Data Cleaning

getwd()
## [1] "/Users/victoriafield/Library/Mobile Documents/com~apple~CloudDocs/Masters Thesis/Chapter Two CSLAP Manuscript/R - data and outputs"
CSLAP<-read.csv("./raw_data/CSLAP 2012-2017 EDI Original.csv", na.strings = c("", " ", "NA", "NaN"))
str(CSLAP)
## 'data.frame':    5025 obs. of  47 variables:
##  $ LakeID                       : chr  "1005GLE0441" "1005GLE0441" "1005GLE0441" "1005GLE0441" ...
##  $ Lake.Name                    : chr  "Glen Lake" "Glen Lake" "Glen Lake" "Glen Lake" ...
##  $ Proj..Code                   : num  8 8 8 8 8 8 8 8 8 8 ...
##  $ Sample.Name                  : chr  "16-08-02" "16-08-03" "16-08-04" "16-08-05" ...
##  $ Sample.Date                  : chr  "07/04/2016" "07/11/2016" "07/26/2016" "08/01/2016" ...
##  $ Data.Provider                : chr  "CSL" "CSL" "CSL" "CSL" ...
##  $ Info.Type                    : chr  "OW" "OW" "OW" "OW" ...
##  $ LocationID                   : chr  "1005GLE0441_C" "1005GLE0441_C" "1005GLE0441_C" "1005GLE0441_C" ...
##  $ Latitude                     : num  43.4 43.4 43.4 43.4 43.4 ...
##  $ Longitude                    : num  -73.7 -73.7 -73.7 -73.7 -73.7 ...
##  $ Sample.Depth..m.             : num  1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 ...
##  $ Secchi.Depth..m.             : num  3.95 4 3.95 4 5.95 4.05 4.9 3.55 2.6 3.3 ...
##  $ DO..mg.L.                    : logi  NA NA NA NA NA NA ...
##  $ Bottom.DO..mg.L.             : logi  NA NA NA NA NA NA ...
##  $ pH                           : num  7.59 7.55 8.37 7.37 7.37 7.51 7.99 7.77 7.76 7.45 ...
##  $ TP..mg.L.                    : num  0.0068 0.0081 0.0077 0.0061 0.0063 0.0067 0.0076 0.0086 NA NA ...
##  $ TDP..mg.L.                   : logi  NA NA NA NA NA NA ...
##  $ Conductance..umho.cm.        : num  424 363 451 406 400 ...
##  $ Temperature..air..deg.C.     : num  21 23 25 22 17 25 26 24 22 21 ...
##  $ Temperature..water..deg.C.   : num  24 26 26 26 19 27 26 23 25 22 ...
##  $ TN..mg.L.                    : num  0.321 0.197 0.202 0.214 0.215 0.515 0.29 0.229 0.291 0.189 ...
##  $ TDN..mg.L.                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Ammonia..mg.L.               : num  NA 0.028 NA 0.076 0.102 NA 0.0075 NA 0.022 0 ...
##  $ NOx..mg.L.                   : num  NA 0.0035 NA 0.034 0.029 NA 0.0035 NA 0.00275 NA ...
##  $ Nitrate..mg.L.               : logi  NA NA NA NA NA NA ...
##  $ Nitrite..mg.L.               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ True.Color..ptu.             : num  15 23 15 18 11 16 22 14 6 3 ...
##  $ Chloride..mg.L.              : num  NA 51.7 NA NA NA NA 74.1 NA NA NA ...
##  $ Calcium..mg.L.               : num  NA NA NA NA NA NA NA NA 21.3 NA ...
##  $ Chl.a..probe...ug.L.         : logi  NA NA NA NA NA NA ...
##  $ Extracted.Chl.a..ug.L.       : num  0.8 1.7 1.5 0.6 1.1 1.7 1.5 3.8 13.3 3.2 ...
##  $ UFI.SBK.FP.Chl.a..ug.L.      : logi  NA NA NA NA NA NA ...
##  $ UFI.SBK.FP.BGA..ug.L.        : logi  NA NA NA NA NA NA ...
##  $ X.UFI.SBK.Microcystin..ug.L..: logi  NA NA NA NA NA NA ...
##  $ ESF.FP.Chl.a..ug.L.          : num  NA NA NA NA NA NA NA NA 2.45 2.21 ...
##  $ ESF.FP.BGA..ug.L.            : num  0 0 0.36 0.2 0 0.17 0.19 0.42 0 0 ...
##  $ ESF.Microcystin..ug.L.       : chr  "ND" "ND" "ND" "ND" ...
##  $ HAB.Status                   : chr  NA NA NA "No Bloom" ...
##  $ Alkalinity.as.CaCO3..mg.L.   : logi  NA NA NA NA NA NA ...
##  $ DOC..mg.L.                   : logi  NA NA NA NA NA NA ...
##  $ UV.254..1.cm.                : logi  NA NA NA NA NA NA ...
##  $ Iron..ug.L.                  : logi  NA NA NA NA NA NA ...
##  $ Manganese..ug.L.             : logi  NA NA NA NA NA NA ...
##  $ Arsenic..ug.L.               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ QA                           : int  3 2 2 2 NA 2 2 2 3 2 ...
##  $ QB                           : int  3 3 3 3 NA 4 3 3 3 3 ...
##  $ QC                           : int  3 2 3 3 NA 2 2 3 3 2 ...

Based on output from str() and prior knowledge of the data, to clean this dataset I need to:

-Change column names and remove units

-Identify duplicate levels of Lake_Name, we know there is a duplicate because we have 73 levels of the unique factor LakeID and 72 levels of Lake_Name

-Split Sample_Date variable into two more columns: Sample_Year and Sample_Month

-For each lake, identify how many years the observations span (If less than 3, remove from dataset)

-Clean levels of Data_Provider to one level (“CSL”)

-Clean levels of Info_Type

-Remove character strings from ESF_Microcystin_ug.L

-Read in a merge values for Mean Depth, Catchment Area to Surface Area Ratio, Dreissenid Status By Year, and %Agricultural Cover

Change column names

colnames(CSLAP)<-c("LakeID", "Lake_Name", "Proj_Code", "Sample_Name", "Sample_Date", "Data_Provider", "Info_Type", "LocationID", "Latitude", "Longitude", "Sample_Depth", "Secchi_Depth", "DO", "Bottom_DO","pH", "TP", "TDP", "Conductance", "Temp_Air", "Temp_Water", "TN", "TDN" , "Ammonia", "NOx", "Nitrate", "Nitrite", "True_Color", "Chloride", "Calcium", "Chl.a_FP", "Extracted_Chl.a", "UFI_SBK_FP_Chl.a", "UFI_SBK_FP_BGA", "UFI_SBK_Microcystin", "ESF_FP_Chl.a","ESF_FP_BGA", "ESF_Microcystin", "HAB_Status", "Alkalinity_CaCO3", "DOC", "UV_254", "Iron", "Manganese", "Arsenic", "QA", "QB", "QC")

Change duplicate Lake_Name:“Forest Lake” and remove white space from Lake_Name: “Hadlock Pond”

We want the lake with LakeID:“1301FOR0441” to be called “Lake Forest” instead of “Forest Lake”. And we need to change “Hadlock Pond” to “Hadlock Pond”

CSLAP$Lake_Name<-as.character(CSLAP$Lake_Name) #first, make this column a character vector instead of factor
CSLAP[CSLAP$LakeID == "1301FOR0441", 2] <- rep("Lake Forest", 40) #then, replace the specified cells with "Lake Forest"
CSLAP[CSLAP$LakeID == "1005HAD0432", 2] <- rep("Hadlock Pond", 78) #then, replace the specified cells with "Hadlock Pond"
CSLAP$Lake_Name<-as.factor(CSLAP$Lake_Name) #last, change the column back to factor vector

Create new columns for Sample_Year and Sample_Month

#extract sample year and month 
CSLAP$Sample_Year<-as.numeric(format(as.Date(CSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
CSLAP$Sample_Month<-as.numeric(format(as.Date(CSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))

For each lake, check how many years they were enrolled in CSLAP. Remove lakes with less than 3 years

table(CSLAP$LakeID, CSLAP$Sample_Year)
##               
##                2012 2013 2014 2015 2016 2017
##   0201CUB0115    16   16   16   16   16   16
##   0303LOR0009     8    8    0    8    8    8
##   0403RUS0146     0    0    0    0   16   16
##   0404LOO0079     4    8    8   16   16   16
##   0601CHE0213    17   17   16   16   17   16
##   0601GUI0188    16   16   16   13   17   16
##   0602BRA0154    13   12   16    0   19   16
##   0602CRO0073    12   13    4   13   17   14
##   0602EAR0146    22   26    3   20   18   16
##   0602EAT0163    13   12    0   16   16   16
##   0602ECH0081    12   17   17   17    0    0
##   0602GEN0088    12   14   16   18   18    0
##   0602HAT0155    15   17   17    0   16   16
##   0602PET0078    16   12   17   14   17   16
##   0602PLY0107     8    8    8    8   10    8
##   0602SON0072    18    2   23   21   20   16
##   0602TUL0068    10   12   16   17   18   16
##   0602ULI0067     9   12    0   18   17   16
##   0602WAR0094     8   13   16   16   17   16
##   0703CAZ0153    18   24   25    2   23   16
##   0703DER0139A   12   12   16   16   16   16
##   0703KAS0109     7    9    8    8    0    8
##   0703PAN0094     0    0    8    7    8   10
##   0703TUS0153A   12   14    0   16   14   16
##   0704CAN0286     0    0    0    0    1    0
##   0801BRA0689    16   16   16    0   16   16
##   0801EFF0426     0    8    8    0    0    0
##   0801OTT0926     0    8    0    8    8    8
##   0902EAG0083     9    8    0    8   11    9
##   0904SIL0351    10    8    8    9    8    8
##   0906BON0024    12   12   16   14   16   17
##   0906GRA0051     0   10   15   14    8   16
##   0906MIL0055    12   12   16   16   12   16
##   0906OTH0009     0    0    0   19   17   16
##   1004AUG0213    16   16    3   17   16   16
##   1004LIN0315     0   11   16   16   15    0
##   1004PLA0254    24   17   14   22   12   12
##   1005GLE0441    11   18   14   16   16   16
##   1005HAD0432    12   16   17   17    0   16
##   1005SUN0440    15   22   18   15    0   16
##   1102BAB1109    16   18   17   17   16   16
##   1102TAC1129     0    5    5    4    6    6
##   1104EAG0438    12   13   14   12   16   16
##   1104EFN0129    12   12   15   16   16    0
##   1104FOR0340     4    6    4    6    6    7
##   1104FRI0365    16   16   16    0    8    8
##   1104HUN0131    12   12    0   16   16   16
##   1104JEN0130    12   12   15    0   13   16
##   1104PAR0432     0   28    0    0    0    0
##   1104PLE0313     0    0   16   16   16    0
##   1104SAC0314    16   16    0    0    0   16
##   1104SCH0374    28   23   12   33   32   32
##   1201CAN0717    12   12   15   14   15   16
##   1201ECA0697    13   10   17   16   16   16
##   1201GAL0563    12   13   16   16   16   16
##   1201PEC0686     8    0    8   12   16   16
##   1201PLE0745     0   12   16   12    8    8
##   1301BOW0444     0    9    9    8   14    8
##   1301BUR0386C    0   13    9   15   16   16
##   1301FOR0441     0    8    8    7    9    8
##   1301IND0167     0    5    9    7   16   17
##   1301ROA0183B    9    8    0   16   10   10
##   1301SEP0826     0   16   13   15   16   10
##   1310QUE0057    13   12   17   19   14   16
##   1310ROU0080     0    6    7    7    6    8
##   1310SPR0081     0    0   16   16   16   16
##   1401ANA0251    12   14   16   14   16   14
##   1401DEV0174     0    0   16   16   17   12
##   1401SOM0268    24   24   17   16   17   16
##   1402YAN0021     0    8    8    6    0    8
##   1404OQU0383    16   16   16   16   14   16
##   1501TUX1007    16    0   16   17   12   14
##   1701LLO0708     8    8    8    7    8    8
CSLAP$LakeID<-as.character(CSLAP$LakeID) #first, change to a character vector

CSLAP<-CSLAP[CSLAP$LakeID != "0403RUS0146", ]
CSLAP<-CSLAP[CSLAP$LakeID != "0704CAN0286", ] 
CSLAP<-CSLAP[CSLAP$LakeID != "0801EFF0426", ] 
CSLAP<-CSLAP[CSLAP$LakeID != "1104PAR0432", ] 
CSLAP<-CSLAP[CSLAP$LakeID != "0602PLY0107", ] #Plymouth reservoir will also be removed because it is eutrophic

CSLAP$LakeID<-as.factor(CSLAP$LakeID)
CSLAP$LakeID<-droplevels(CSLAP$LakeID)
levels(CSLAP$LakeID)
##  [1] "0201CUB0115"  "0303LOR0009"  "0404LOO0079"  "0601CHE0213"  "0601GUI0188" 
##  [6] "0602BRA0154"  "0602CRO0073"  "0602EAR0146"  "0602EAT0163"  "0602ECH0081" 
## [11] "0602GEN0088"  "0602HAT0155"  "0602PET0078"  "0602SON0072"  "0602TUL0068" 
## [16] "0602ULI0067"  "0602WAR0094"  "0703CAZ0153"  "0703DER0139A" "0703KAS0109" 
## [21] "0703PAN0094"  "0703TUS0153A" "0801BRA0689"  "0801OTT0926"  "0902EAG0083" 
## [26] "0904SIL0351"  "0906BON0024"  "0906GRA0051"  "0906MIL0055"  "0906OTH0009" 
## [31] "1004AUG0213"  "1004LIN0315"  "1004PLA0254"  "1005GLE0441"  "1005HAD0432" 
## [36] "1005SUN0440"  "1102BAB1109"  "1102TAC1129"  "1104EAG0438"  "1104EFN0129" 
## [41] "1104FOR0340"  "1104FRI0365"  "1104HUN0131"  "1104JEN0130"  "1104PLE0313" 
## [46] "1104SAC0314"  "1104SCH0374"  "1201CAN0717"  "1201ECA0697"  "1201GAL0563" 
## [51] "1201PEC0686"  "1201PLE0745"  "1301BOW0444"  "1301BUR0386C" "1301FOR0441" 
## [56] "1301IND0167"  "1301ROA0183B" "1301SEP0826"  "1310QUE0057"  "1310ROU0080" 
## [61] "1310SPR0081"  "1401ANA0251"  "1401DEV0174"  "1401SOM0268"  "1402YAN0021" 
## [66] "1404OQU0383"  "1501TUX1007"  "1701LLO0708"

Change levels of Data_Provider

levels(CSLAP$Data_Provider)
## NULL
CSLAP$Data_Provider<-as.character(CSLAP$Data_Provider)
CSLAP[CSLAP$Data_Provider == "csl", 6] <- "CSL"
CSLAP$Data_Provider<-as.factor(CSLAP$Data_Provider)
CSLAP$Data_Provider<-droplevels(CSLAP$Data_Provider)
levels(CSLAP$Data_Provider)
## [1] "CSL"

Change levels of Info_Type

levels(CSLAP$Info_Type)
## NULL
CSLAP$Info_Type<-as.character(CSLAP$Info_Type)
CSLAP[CSLAP$Info_Type == "ow", 7] <- "OW"
CSLAP[CSLAP$Info_Type == "sb", 7] <- "SB"
CSLAP$Info_Type<-as.factor(CSLAP$Info_Type)
CSLAP$Info_Type<-droplevels(CSLAP$Info_Type)
levels(CSLAP$Info_Type)
## [1] "BS" "OW" "RT" "SB"

Clean ESF_Microcystin vector

# ESF_Microcystin should be a numeric column,but it's listed as factor. We need to remove character values from this column 

CSLAP$ESF_Microcystin[CSLAP$ESF_Microcystin == "ND"] <- NA
CSLAP$ESF_Microcystin[CSLAP$ESF_Microcystin == "no filter"] <- NA

CSLAP$ESF_Microcystin <- as.numeric(as.character(CSLAP$ESF_Microcystin))

str(CSLAP$ESF_Microcystin)
##  num [1:4898] NA NA NA NA NA NA NA NA NA NA ...

Add TN:TP column

CSLAP$TN_TP<-CSLAP$TN/CSLAP$TP

Check for missing values

result <- lapply(CSLAP, identifyMissing)
result
## $LakeID
## No problems found.
## $Lake_Name
## No problems found.
## $Proj_Code
## No problems found.
## $Sample_Name
## No problems found.
## $Sample_Date
## No problems found.
## $Data_Provider
## No problems found.
## $Info_Type
## No problems found.
## $LocationID
## No problems found.
## $Latitude
## No problems found.
## $Longitude
## No problems found.
## $Sample_Depth
## No problems found.
## $Secchi_Depth
## No problems found.
## $DO
## No problems found.
## $Bottom_DO
## No problems found.
## $pH
## No problems found.
## $TP
## No problems found.
## $TDP
## No problems found.
## $Conductance
## No problems found.
## $Temp_Air
## No problems found.
## $Temp_Water
## No problems found.
## $TN
## No problems found.
## $TDN
## No problems found.
## $Ammonia
## No problems found.
## $NOx
## No problems found.
## $Nitrate
## No problems found.
## $Nitrite
## No problems found.
## $True_Color
## No problems found.
## $Chloride
## No problems found.
## $Calcium
## No problems found.
## $Chl.a_FP
## No problems found.
## $Extracted_Chl.a
## No problems found.
## $UFI_SBK_FP_Chl.a
## No problems found.
## $UFI_SBK_FP_BGA
## No problems found.
## $UFI_SBK_Microcystin
## No problems found.
## $ESF_FP_Chl.a
## No problems found.
## $ESF_FP_BGA
## No problems found.
## $ESF_Microcystin
## No problems found.
## $HAB_Status
## No problems found.
## $Alkalinity_CaCO3
## No problems found.
## $DOC
## No problems found.
## $UV_254
## No problems found.
## $Iron
## No problems found.
## $Manganese
## No problems found.
## $Arsenic
## No problems found.
## $QA
## No problems found.
## $QB
## No problems found.
## $QC
## No problems found.
## $Sample_Year
## No problems found.
## $Sample_Month
## No problems found.
## $TN_TP
## The following suspected missing value codes enter as regular values: Inf, NaN.
CSLAP$TN_TP[CSLAP$TN_TP == "Inf"] <- NA
CSLAP$TN_TP[CSLAP$TN_TP == "NaN"] <- NA

result <- lapply(CSLAP, identifyMissing)
result
## $LakeID
## No problems found.
## $Lake_Name
## No problems found.
## $Proj_Code
## No problems found.
## $Sample_Name
## No problems found.
## $Sample_Date
## No problems found.
## $Data_Provider
## No problems found.
## $Info_Type
## No problems found.
## $LocationID
## No problems found.
## $Latitude
## No problems found.
## $Longitude
## No problems found.
## $Sample_Depth
## No problems found.
## $Secchi_Depth
## No problems found.
## $DO
## No problems found.
## $Bottom_DO
## No problems found.
## $pH
## No problems found.
## $TP
## No problems found.
## $TDP
## No problems found.
## $Conductance
## No problems found.
## $Temp_Air
## No problems found.
## $Temp_Water
## No problems found.
## $TN
## No problems found.
## $TDN
## No problems found.
## $Ammonia
## No problems found.
## $NOx
## No problems found.
## $Nitrate
## No problems found.
## $Nitrite
## No problems found.
## $True_Color
## No problems found.
## $Chloride
## No problems found.
## $Calcium
## No problems found.
## $Chl.a_FP
## No problems found.
## $Extracted_Chl.a
## No problems found.
## $UFI_SBK_FP_Chl.a
## No problems found.
## $UFI_SBK_FP_BGA
## No problems found.
## $UFI_SBK_Microcystin
## No problems found.
## $ESF_FP_Chl.a
## No problems found.
## $ESF_FP_BGA
## No problems found.
## $ESF_Microcystin
## No problems found.
## $HAB_Status
## No problems found.
## $Alkalinity_CaCO3
## No problems found.
## $DOC
## No problems found.
## $UV_254
## No problems found.
## $Iron
## No problems found.
## $Manganese
## No problems found.
## $Arsenic
## No problems found.
## $QA
## No problems found.
## $QB
## No problems found.
## $QC
## No problems found.
## $Sample_Year
## No problems found.
## $Sample_Month
## No problems found.
## $TN_TP
## No problems found.

Read in and merge mean depth, CA:SA, dreissenids by year, and %ag

Dreissenids<-read.csv("./raw_data/Dreissenid_Status_byYear.csv", header=TRUE)
Dreissenids$Lake_Name<-as.character(Dreissenids$Lake_Name) 
Dreissenids[200:204,2] <- rep("Hadlock Pond",5)
Dreissenids$Lake_Name<-as.factor(Dreissenids$Lake_Name)

CASA_MD<-read.csv("./raw_data/CASA_Mean_Depth.csv", header=TRUE)
CASA_MD$Lake_Name<-as.character(CASA_MD$Lake_Name) 
CASA_MD[27,2] <- "Hadlock Pond"
CASA_MD$Lake_Name<-as.factor(CASA_MD$Lake_Name)

Ag<-read.csv("./raw_data/Percent Ag Cover.csv", header=TRUE)
Ag$Lake_Name<-as.character(Ag$Lake_Name) 
Ag[27,1] <- "Hadlock Pond"
Ag$Lake_Name<-as.factor(Ag$Lake_Name)

CSLAP<-merge(CSLAP, Dreissenids, by=c("LakeID", "Lake_Name", "Sample_Year"), all.x =TRUE)
CSLAP<-merge(CSLAP, CASA_MD, by=c("LakeID", "Lake_Name"), all.x=TRUE)
CSLAP<-merge(CSLAP, Ag, by="Lake_Name", all.x=TRUE)

Now we have a clean, full dataset. Next, we need to create the reduced dataset using propensity score matching

Create Reduced Dataset

Read in lake data

Lake_List<-read.csv("./raw_data/LakeList.csv", header=TRUE)
Lake_List<-merge(Lake_List, Ag, by="Lake_Name", na.rm=FALSE, all.x=TRUE) #merge %ag
Lake_List<-Lake_List[Lake_List$LakeID != "0403RUS0146",] #Remove Lake Rushford

Propensity Score Estimation

m_ps <- glm(Dreissenids ~ Mean_Depth_m + CA.SA + Percent_Ag,
            family = binomial(), data = Lake_List)
summary(m_ps)
## 
## Call:
## glm(formula = Dreissenids ~ Mean_Depth_m + CA.SA + Percent_Ag, 
##     family = binomial(), data = Lake_List)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2155  -0.4089  -0.3318  -0.3144   2.4219  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -3.113660   0.954012  -3.264  0.00110 **
## Mean_Depth_m  0.019552   0.131026   0.149  0.88138   
## CA.SA         0.005799   0.010392   0.558  0.57684   
## Percent_Ag    0.068900   0.026159   2.634  0.00844 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 49.008  on 66  degrees of freedom
## Residual deviance: 40.860  on 63  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 48.86
## 
## Number of Fisher Scoring iterations: 5
prs_df <- data.frame(pr_score = predict(m_ps, type = "response"),
                     Dreissenids = m_ps$model$Dreissenids)
head(prs_df)
##     pr_score Dreissenids
## 1 0.04688800           0
## 2 0.04809539           0
## 3 0.05508699           0
## 4 0.16993506           0
## 5 0.05479752           0
## 6 0.07388909           0
labs <- paste("Invasion Status:", c("Invaded", "Uninvaded"))
prs_df %>%
  mutate(Dreissenids = ifelse(Dreissenids == 1, labs[1], labs[2])) %>%
  ggplot(aes(x = pr_score)) +
  geom_histogram(color = "white") +
  facet_wrap(~Dreissenids) +
  xlab("Probability of being invaded") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

lakes_nomiss <- Lake_List %>%  # MatchIt does not allow missing values
  dplyr::select(CA.SA, Mean_Depth_m, Percent_Ag, LakeID, Lake_Name, Dreissenids) %>%
  na.omit()

mod_match <- matchit(Dreissenids ~ CA.SA + Mean_Depth_m + Percent_Ag,
                     method = "nearest", data = lakes_nomiss)

df_match <- match.data(mod_match)

Extract observations that come from lakes in the reduced set to create new, clean reduced dataset

redLakes<-df_match[,c(4,6)]

redCSLAP<-merge(CSLAP, redLakes, by="LakeID")

redCSLAP<-redCSLAP[,1:54] #removed redundant Dreissenids column

colnames(redCSLAP)[51] <- "Dreissenids"

Data Summaries and Histograms

Global Dataset

Data Summaries

#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk 
CSLAP2<-CSLAP[, c(8, 13, 17, 21, 22, 28, 32, 36, 37, 38, 50:54)]
meltCSLAP <- melt(CSLAP2, id=c("Dreissenids", "Info_Type"), na.rm=TRUE)

#summarize each variable split by invasion status and info
VariableSummaries<-ddply(meltCSLAP, c("Dreissenids", "Info_Type", "variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
VariableSummaries
##    Dreissenids Info_Type        variable         Mean     Median        Min
## 1      Invaded        BS              TP 3.945294e-02   0.019850  0.0000000
## 2      Invaded        BS      Temp_Water 1.511196e+01  15.000000  5.0000000
## 3      Invaded        BS           CA.SA 2.788857e+01   6.507353  4.4873502
## 4      Invaded        BS    Mean_Depth_m 6.232740e+00   6.200000  3.7000000
## 5      Invaded        BS      Percent_Ag 2.046619e+01  20.000000  1.0000000
## 6      Invaded        OW    Secchi_Depth 4.536559e+00   4.400000  1.1000000
## 7      Invaded        OW              TP 1.276286e-02   0.011000  0.0036000
## 8      Invaded        OW      Temp_Water 2.330388e+01  24.000000 10.0000000
## 9      Invaded        OW              TN 5.232666e-01   0.368810  0.0000000
## 10     Invaded        OW      True_Color 1.295292e+01  11.000000  0.5000000
## 11     Invaded        OW Extracted_Chl.a 2.372222e+00   1.900000  0.0500000
## 12     Invaded        OW    ESF_FP_Chl.a 1.945223e+00   1.600000  0.0000000
## 13     Invaded        OW      ESF_FP_BGA 4.813312e-01   0.100000  0.0000000
## 14     Invaded        OW ESF_Microcystin 6.437536e-01   0.440000  0.3285191
## 15     Invaded        OW           TN_TP 4.676542e+01  31.182701  0.0000000
## 16     Invaded        OW           CA.SA 2.823056e+01  10.780370  4.4873502
## 17     Invaded        OW    Mean_Depth_m 6.233231e+00   6.200000  3.7000000
## 18     Invaded        OW      Percent_Ag 1.934462e+01  20.000000  1.0000000
## 19     Invaded        SB    Secchi_Depth 2.750000e+00   2.750000  2.7500000
## 20     Invaded        SB    ESF_FP_Chl.a 2.138201e+03 142.635000  0.2000000
## 21     Invaded        SB      ESF_FP_BGA 2.002831e+03  85.475000  0.0000000
## 22     Invaded        SB ESF_Microcystin 7.493377e+01  43.840108  0.8346926
## 23     Invaded        SB           CA.SA 1.424389e+01   4.807692  4.6535893
## 24     Invaded        SB    Mean_Depth_m 5.713333e+00   7.100000  3.7000000
## 25     Invaded        SB      Percent_Ag 3.256667e+01  21.500000 20.0000000
## 26   Uninvaded        BS    Secchi_Depth 5.383333e+00   5.350000  5.1000000
## 27   Uninvaded        BS              TP 5.579272e-02   0.015000  0.0000000
## 28   Uninvaded        BS      Temp_Water 1.347582e+01  13.000000  3.0000000
## 29   Uninvaded        BS              TN 3.655000e-01   0.365500  0.3580000
## 30   Uninvaded        BS      True_Color 1.300000e+01  13.000000  9.0000000
## 31   Uninvaded        BS Extracted_Chl.a 1.600000e+00   1.600000  1.6000000
## 32   Uninvaded        BS    ESF_FP_Chl.a 5.300000e-01   0.570000  0.3900000
## 33   Uninvaded        BS      ESF_FP_BGA 0.000000e+00   0.000000  0.0000000
## 34   Uninvaded        BS           TN_TP 1.234775e+01  12.347749  8.1978022
## 35   Uninvaded        BS           CA.SA 1.816096e+01   7.732739  0.7403433
## 36   Uninvaded        BS    Mean_Depth_m 7.083978e+00   5.500000  2.6000000
## 37   Uninvaded        BS      Percent_Ag 8.507919e+00   1.000000  0.0000000
## 38   Uninvaded        OW    Secchi_Depth 4.075063e+00   3.800000  1.0000000
## 39   Uninvaded        OW              TP 1.207152e-02   0.009700  0.0000000
## 40   Uninvaded        OW      Temp_Water 2.271985e+01  23.000000  7.0000000
## 41   Uninvaded        OW              TN 3.831279e-01   0.325000  0.0000000
## 42   Uninvaded        OW      True_Color 1.578847e+01  13.000000  0.5000000
## 43   Uninvaded        OW Extracted_Chl.a 3.377997e+00   2.400000  0.0500000
## 44   Uninvaded        OW    ESF_FP_Chl.a 1.007138e+01   1.700000  0.0000000
## 45   Uninvaded        OW      ESF_FP_BGA 2.181487e+00   0.090000  0.0000000
## 46   Uninvaded        OW ESF_Microcystin 1.188658e+00   0.407566  0.1600000
## 47   Uninvaded        OW           TN_TP 4.209054e+01  32.444444  1.0840999
## 48   Uninvaded        OW           CA.SA 1.883358e+01   8.945022  0.7403433
## 49   Uninvaded        OW    Mean_Depth_m 5.966943e+00   4.700000  0.4000000
## 50   Uninvaded        OW      Percent_Ag 6.657864e+00   1.000000  0.0000000
## 51   Uninvaded        RT           CA.SA 8.140788e+01  81.407877 81.4078774
## 52   Uninvaded        RT    Mean_Depth_m 1.700000e+01  17.000000 17.0000000
## 53   Uninvaded        RT      Percent_Ag 1.000000e+00   1.000000  1.0000000
## 54   Uninvaded        SB    Secchi_Depth 3.600000e+00   2.500000  1.7000000
## 55   Uninvaded        SB    ESF_FP_Chl.a 2.791198e+03 171.710000  0.2800000
## 56   Uninvaded        SB      ESF_FP_BGA 2.309770e+03  26.870000  0.0000000
## 57   Uninvaded        SB ESF_Microcystin 2.086218e+03 100.355000  0.8100000
## 58   Uninvaded        SB           CA.SA 9.269072e+00   6.057692  0.7403433
## 59   Uninvaded        SB    Mean_Depth_m 5.083735e+00   4.600000  0.4000000
## 60   Uninvaded        SB      Percent_Ag 9.583851e+00   4.000000  0.0000000
##            Max           SD
## 1      0.37420 5.324365e-02
## 2     25.00000 5.298994e+00
## 3    189.85507 5.436477e+01
## 4      9.90000 1.727590e+00
## 5     48.00000 1.550414e+01
## 6      9.10000 1.395980e+00
## 7      0.06640 7.093941e-03
## 8     36.00000 2.860583e+00
## 9      4.12400 4.399775e-01
## 10    55.00000 9.245752e+00
## 11    13.30000 1.839573e+00
## 12    12.15000 1.634573e+00
## 13    10.36000 9.835983e-01
## 14     1.84000 5.345261e-01
## 15   240.59524 4.110511e+01
## 16   189.85507 5.391221e+01
## 17     9.90000 1.697457e+00
## 18    48.00000 1.529962e+01
## 19     2.75000           NA
## 20 31507.50000 6.226951e+03
## 21 31507.50000 6.221888e+03
## 22   361.24104 9.244751e+01
## 23   189.85507 4.064373e+01
## 24     9.90000 1.901513e+00
## 25    48.00000 1.359881e+01
## 26     5.70000 3.013857e-01
## 27     3.90960 1.657730e-01
## 28    29.00000 5.807561e+00
## 29     0.37300 1.060660e-02
## 30    17.00000 4.000000e+00
## 31     1.60000           NA
## 32     0.63000 1.249000e-01
## 33     0.00000 0.000000e+00
## 34    16.49770 5.868911e+00
## 35   210.42471 3.484989e+01
## 36    21.30000 4.142202e+00
## 37    44.00000 1.271529e+01
## 38    13.60000 1.697343e+00
## 39     0.21590 1.094758e-02
## 40    34.00000 3.428063e+00
## 41    12.93200 4.134485e-01
## 42   100.00000 1.073302e+01
## 43    63.20000 3.682886e+00
## 44  3438.50000 1.476402e+02
## 45  2564.00000 5.420867e+01
## 46    52.86975 5.577302e+00
## 47  7936.00000 1.705462e+02
## 48   210.42471 3.385397e+01
## 49    21.30000 4.112813e+00
## 50    44.00000 1.106120e+01
## 51    81.40788           NA
## 52    17.00000           NA
## 53     1.00000           NA
## 54     6.60000 2.628688e+00
## 55 74781.25000 9.683792e+03
## 56 74781.25000 9.173332e+03
## 57 39757.12502 7.757750e+03
## 58    54.94505 7.784528e+00
## 59    13.00000 2.380611e+00
## 60    41.00000 1.102359e+01
#I will pull out only the values that I need to make copying to a word document "easy"
VariableSummaries[c(1, 6:11,15:18, 20:22, 27, 38:34, 47:50, 55:57),]
##    Dreissenids Info_Type        variable         Mean     Median        Min
## 1      Invaded        BS              TP 3.945294e-02   0.019850  0.0000000
## 6      Invaded        OW    Secchi_Depth 4.536559e+00   4.400000  1.1000000
## 7      Invaded        OW              TP 1.276286e-02   0.011000  0.0036000
## 8      Invaded        OW      Temp_Water 2.330388e+01  24.000000 10.0000000
## 9      Invaded        OW              TN 5.232666e-01   0.368810  0.0000000
## 10     Invaded        OW      True_Color 1.295292e+01  11.000000  0.5000000
## 11     Invaded        OW Extracted_Chl.a 2.372222e+00   1.900000  0.0500000
## 15     Invaded        OW           TN_TP 4.676542e+01  31.182701  0.0000000
## 16     Invaded        OW           CA.SA 2.823056e+01  10.780370  4.4873502
## 17     Invaded        OW    Mean_Depth_m 6.233231e+00   6.200000  3.7000000
## 18     Invaded        OW      Percent_Ag 1.934462e+01  20.000000  1.0000000
## 20     Invaded        SB    ESF_FP_Chl.a 2.138201e+03 142.635000  0.2000000
## 21     Invaded        SB      ESF_FP_BGA 2.002831e+03  85.475000  0.0000000
## 22     Invaded        SB ESF_Microcystin 7.493377e+01  43.840108  0.8346926
## 27   Uninvaded        BS              TP 5.579272e-02   0.015000  0.0000000
## 38   Uninvaded        OW    Secchi_Depth 4.075063e+00   3.800000  1.0000000
## 37   Uninvaded        BS      Percent_Ag 8.507919e+00   1.000000  0.0000000
## 36   Uninvaded        BS    Mean_Depth_m 7.083978e+00   5.500000  2.6000000
## 35   Uninvaded        BS           CA.SA 1.816096e+01   7.732739  0.7403433
## 34   Uninvaded        BS           TN_TP 1.234775e+01  12.347749  8.1978022
## 47   Uninvaded        OW           TN_TP 4.209054e+01  32.444444  1.0840999
## 48   Uninvaded        OW           CA.SA 1.883358e+01   8.945022  0.7403433
## 49   Uninvaded        OW    Mean_Depth_m 5.966943e+00   4.700000  0.4000000
## 50   Uninvaded        OW      Percent_Ag 6.657864e+00   1.000000  0.0000000
## 55   Uninvaded        SB    ESF_FP_Chl.a 2.791198e+03 171.710000  0.2800000
## 56   Uninvaded        SB      ESF_FP_BGA 2.309770e+03  26.870000  0.0000000
## 57   Uninvaded        SB ESF_Microcystin 2.086218e+03 100.355000  0.8100000
##           Max           SD
## 1      0.3742 5.324365e-02
## 6      9.1000 1.395980e+00
## 7      0.0664 7.093941e-03
## 8     36.0000 2.860583e+00
## 9      4.1240 4.399775e-01
## 10    55.0000 9.245752e+00
## 11    13.3000 1.839573e+00
## 15   240.5952 4.110511e+01
## 16   189.8551 5.391221e+01
## 17     9.9000 1.697457e+00
## 18    48.0000 1.529962e+01
## 20 31507.5000 6.226951e+03
## 21 31507.5000 6.221888e+03
## 22   361.2410 9.244751e+01
## 27     3.9096 1.657730e-01
## 38    13.6000 1.697343e+00
## 37    44.0000 1.271529e+01
## 36    21.3000 4.142202e+00
## 35   210.4247 3.484989e+01
## 34    16.4977 5.868911e+00
## 47  7936.0000 1.705462e+02
## 48   210.4247 3.385397e+01
## 49    21.3000 4.112813e+00
## 50    44.0000 1.106120e+01
## 55 74781.2500 9.683792e+03
## 56 74781.2500 9.173332e+03
## 57 39757.1250 7.757750e+03
#Write this dataframe to a csv so the values can be easily put into a word document 
write.csv(VariableSummaries, "./output_data/VariableSummaries-June2020.csv")

Histograms

First we will split our dataset based on information type

OWCSLAP<-CSLAP[CSLAP$Info_Type == "OW",]
BSCSLAP<-CSLAP[CSLAP$Info_Type == "BS",]
SBCSLAP<-CSLAP[CSLAP$Info_Type == "SB",]

redOWCSLAP<-redCSLAP[redCSLAP$Info_Type == "OW",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "BS",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "SB",]

Open Water TP

ggplot(OWCSLAP, aes(x=TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TP", x="TP (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 49 rows containing non-finite values (stat_bin).
## Warning: Removed 49 rows containing non-finite values (stat_density).

Open Water TN

ggplot(OWCSLAP, aes(x=TN)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN", x="TN (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 66 rows containing non-finite values (stat_bin).
## Warning: Removed 66 rows containing non-finite values (stat_density).

Open Water TN:TP

ggplot(OWCSLAP, aes(x=TN_TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN:TP", x="TN:TP", y="Frequency")+
  scale_x_continuous(limits=c(0,100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 180 rows containing non-finite values (stat_bin).
## Warning: Removed 180 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).

Open Water Chlorophyll-a

ggplot(OWCSLAP, aes(x=Extracted_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 114 rows containing non-finite values (stat_bin).
## Warning: Removed 114 rows containing non-finite values (stat_density).

Secchi Depth

ggplot(OWCSLAP, aes(x=Secchi_Depth)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Secchi Depth", x="Secchi depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 114 rows containing non-finite values (stat_bin).
## Warning: Removed 114 rows containing non-finite values (stat_density).

True Color

ggplot(OWCSLAP, aes(x=True_Color)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="True Color", x="True color (PTU)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 95 rows containing non-finite values (stat_bin).
## Warning: Removed 95 rows containing non-finite values (stat_density).

Shoreline Bloom Chlorophyll

ggplot(SBCSLAP, aes(x=ESF_FP_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing non-finite values (stat_density).

Shoreline Bloom Microcystin

ggplot(SBCSLAP, aes(x=ESF_Microcystin)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 180 rows containing non-finite values (stat_bin).
## Warning: Removed 180 rows containing non-finite values (stat_density).

Mean Depth

#Quickly change factor level labels for Dreissenids
Lake_List[Lake_List$Dreissenids == 0, 5] <- rep("Uninvaded", 60)
Lake_List[Lake_List$Dreissenids == 1, 5] <- rep("Invaded", 8)
ggplot(Lake_List, aes(x=Mean_Depth_m)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Mean Depth", x="Mean depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CA:SA

ggplot(Lake_List, aes(x=CA.SA)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Catchment Area to Surface Area Ratio", x="CA:SA", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Percent Agriculture

ggplot(Lake_List, aes(x=Percent_Ag)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Percent Agricultural Cover in Watershed", x="Agriculture (%)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_density).

Reduced Dataset

Data Summaries

#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk 
redCSLAP2<-redCSLAP[, c(8, 13, 17, 21, 22, 28, 32, 36, 37, 38, 50:54)]
redmeltCSLAP <- melt(redCSLAP2, id=c("Dreissenids", "Info_Type"), na.rm=TRUE)

#summarize each variable split by invasion status and info
redVariableSummaries<-ddply(meltCSLAP, c("Dreissenids", "Info_Type", "variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
redVariableSummaries
##    Dreissenids Info_Type        variable         Mean     Median        Min
## 1      Invaded        BS              TP 3.945294e-02   0.019850  0.0000000
## 2      Invaded        BS      Temp_Water 1.511196e+01  15.000000  5.0000000
## 3      Invaded        BS           CA.SA 2.788857e+01   6.507353  4.4873502
## 4      Invaded        BS    Mean_Depth_m 6.232740e+00   6.200000  3.7000000
## 5      Invaded        BS      Percent_Ag 2.046619e+01  20.000000  1.0000000
## 6      Invaded        OW    Secchi_Depth 4.536559e+00   4.400000  1.1000000
## 7      Invaded        OW              TP 1.276286e-02   0.011000  0.0036000
## 8      Invaded        OW      Temp_Water 2.330388e+01  24.000000 10.0000000
## 9      Invaded        OW              TN 5.232666e-01   0.368810  0.0000000
## 10     Invaded        OW      True_Color 1.295292e+01  11.000000  0.5000000
## 11     Invaded        OW Extracted_Chl.a 2.372222e+00   1.900000  0.0500000
## 12     Invaded        OW    ESF_FP_Chl.a 1.945223e+00   1.600000  0.0000000
## 13     Invaded        OW      ESF_FP_BGA 4.813312e-01   0.100000  0.0000000
## 14     Invaded        OW ESF_Microcystin 6.437536e-01   0.440000  0.3285191
## 15     Invaded        OW           TN_TP 4.676542e+01  31.182701  0.0000000
## 16     Invaded        OW           CA.SA 2.823056e+01  10.780370  4.4873502
## 17     Invaded        OW    Mean_Depth_m 6.233231e+00   6.200000  3.7000000
## 18     Invaded        OW      Percent_Ag 1.934462e+01  20.000000  1.0000000
## 19     Invaded        SB    Secchi_Depth 2.750000e+00   2.750000  2.7500000
## 20     Invaded        SB    ESF_FP_Chl.a 2.138201e+03 142.635000  0.2000000
## 21     Invaded        SB      ESF_FP_BGA 2.002831e+03  85.475000  0.0000000
## 22     Invaded        SB ESF_Microcystin 7.493377e+01  43.840108  0.8346926
## 23     Invaded        SB           CA.SA 1.424389e+01   4.807692  4.6535893
## 24     Invaded        SB    Mean_Depth_m 5.713333e+00   7.100000  3.7000000
## 25     Invaded        SB      Percent_Ag 3.256667e+01  21.500000 20.0000000
## 26   Uninvaded        BS    Secchi_Depth 5.383333e+00   5.350000  5.1000000
## 27   Uninvaded        BS              TP 5.579272e-02   0.015000  0.0000000
## 28   Uninvaded        BS      Temp_Water 1.347582e+01  13.000000  3.0000000
## 29   Uninvaded        BS              TN 3.655000e-01   0.365500  0.3580000
## 30   Uninvaded        BS      True_Color 1.300000e+01  13.000000  9.0000000
## 31   Uninvaded        BS Extracted_Chl.a 1.600000e+00   1.600000  1.6000000
## 32   Uninvaded        BS    ESF_FP_Chl.a 5.300000e-01   0.570000  0.3900000
## 33   Uninvaded        BS      ESF_FP_BGA 0.000000e+00   0.000000  0.0000000
## 34   Uninvaded        BS           TN_TP 1.234775e+01  12.347749  8.1978022
## 35   Uninvaded        BS           CA.SA 1.816096e+01   7.732739  0.7403433
## 36   Uninvaded        BS    Mean_Depth_m 7.083978e+00   5.500000  2.6000000
## 37   Uninvaded        BS      Percent_Ag 8.507919e+00   1.000000  0.0000000
## 38   Uninvaded        OW    Secchi_Depth 4.075063e+00   3.800000  1.0000000
## 39   Uninvaded        OW              TP 1.207152e-02   0.009700  0.0000000
## 40   Uninvaded        OW      Temp_Water 2.271985e+01  23.000000  7.0000000
## 41   Uninvaded        OW              TN 3.831279e-01   0.325000  0.0000000
## 42   Uninvaded        OW      True_Color 1.578847e+01  13.000000  0.5000000
## 43   Uninvaded        OW Extracted_Chl.a 3.377997e+00   2.400000  0.0500000
## 44   Uninvaded        OW    ESF_FP_Chl.a 1.007138e+01   1.700000  0.0000000
## 45   Uninvaded        OW      ESF_FP_BGA 2.181487e+00   0.090000  0.0000000
## 46   Uninvaded        OW ESF_Microcystin 1.188658e+00   0.407566  0.1600000
## 47   Uninvaded        OW           TN_TP 4.209054e+01  32.444444  1.0840999
## 48   Uninvaded        OW           CA.SA 1.883358e+01   8.945022  0.7403433
## 49   Uninvaded        OW    Mean_Depth_m 5.966943e+00   4.700000  0.4000000
## 50   Uninvaded        OW      Percent_Ag 6.657864e+00   1.000000  0.0000000
## 51   Uninvaded        RT           CA.SA 8.140788e+01  81.407877 81.4078774
## 52   Uninvaded        RT    Mean_Depth_m 1.700000e+01  17.000000 17.0000000
## 53   Uninvaded        RT      Percent_Ag 1.000000e+00   1.000000  1.0000000
## 54   Uninvaded        SB    Secchi_Depth 3.600000e+00   2.500000  1.7000000
## 55   Uninvaded        SB    ESF_FP_Chl.a 2.791198e+03 171.710000  0.2800000
## 56   Uninvaded        SB      ESF_FP_BGA 2.309770e+03  26.870000  0.0000000
## 57   Uninvaded        SB ESF_Microcystin 2.086218e+03 100.355000  0.8100000
## 58   Uninvaded        SB           CA.SA 9.269072e+00   6.057692  0.7403433
## 59   Uninvaded        SB    Mean_Depth_m 5.083735e+00   4.600000  0.4000000
## 60   Uninvaded        SB      Percent_Ag 9.583851e+00   4.000000  0.0000000
##            Max           SD
## 1      0.37420 5.324365e-02
## 2     25.00000 5.298994e+00
## 3    189.85507 5.436477e+01
## 4      9.90000 1.727590e+00
## 5     48.00000 1.550414e+01
## 6      9.10000 1.395980e+00
## 7      0.06640 7.093941e-03
## 8     36.00000 2.860583e+00
## 9      4.12400 4.399775e-01
## 10    55.00000 9.245752e+00
## 11    13.30000 1.839573e+00
## 12    12.15000 1.634573e+00
## 13    10.36000 9.835983e-01
## 14     1.84000 5.345261e-01
## 15   240.59524 4.110511e+01
## 16   189.85507 5.391221e+01
## 17     9.90000 1.697457e+00
## 18    48.00000 1.529962e+01
## 19     2.75000           NA
## 20 31507.50000 6.226951e+03
## 21 31507.50000 6.221888e+03
## 22   361.24104 9.244751e+01
## 23   189.85507 4.064373e+01
## 24     9.90000 1.901513e+00
## 25    48.00000 1.359881e+01
## 26     5.70000 3.013857e-01
## 27     3.90960 1.657730e-01
## 28    29.00000 5.807561e+00
## 29     0.37300 1.060660e-02
## 30    17.00000 4.000000e+00
## 31     1.60000           NA
## 32     0.63000 1.249000e-01
## 33     0.00000 0.000000e+00
## 34    16.49770 5.868911e+00
## 35   210.42471 3.484989e+01
## 36    21.30000 4.142202e+00
## 37    44.00000 1.271529e+01
## 38    13.60000 1.697343e+00
## 39     0.21590 1.094758e-02
## 40    34.00000 3.428063e+00
## 41    12.93200 4.134485e-01
## 42   100.00000 1.073302e+01
## 43    63.20000 3.682886e+00
## 44  3438.50000 1.476402e+02
## 45  2564.00000 5.420867e+01
## 46    52.86975 5.577302e+00
## 47  7936.00000 1.705462e+02
## 48   210.42471 3.385397e+01
## 49    21.30000 4.112813e+00
## 50    44.00000 1.106120e+01
## 51    81.40788           NA
## 52    17.00000           NA
## 53     1.00000           NA
## 54     6.60000 2.628688e+00
## 55 74781.25000 9.683792e+03
## 56 74781.25000 9.173332e+03
## 57 39757.12502 7.757750e+03
## 58    54.94505 7.784528e+00
## 59    13.00000 2.380611e+00
## 60    41.00000 1.102359e+01
#Write this dataframe to a csv so the values can be easily put into a word document 
write.csv(redVariableSummaries, "./output_data/redVariableSummaries-June2020.csv")

Histograms

First we will split our dataset based on information type

redOWCSLAP<-redCSLAP[redCSLAP$Info_Type == "OW",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "BS",]
redSBCSLAP<-redCSLAP[redCSLAP$Info_Type == "SB",]

Open Water TP

ggplot(redOWCSLAP, aes(x=TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TP", x="TP (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 13 rows containing non-finite values (stat_bin).
## Warning: Removed 13 rows containing non-finite values (stat_density).

Open Water TN

ggplot(redOWCSLAP, aes(x=TN)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN", x="TN (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 20 rows containing non-finite values (stat_bin).
## Warning: Removed 20 rows containing non-finite values (stat_density).

Open Water TN:TP

ggplot(redOWCSLAP, aes(x=TN_TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN:TP", x="TN:TP", y="Frequency")+
  scale_x_continuous(limits=c(0,100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 67 rows containing non-finite values (stat_bin).
## Warning: Removed 67 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).

Open Water Chlorophyll-a

ggplot(redOWCSLAP, aes(x=Extracted_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 24 rows containing non-finite values (stat_bin).
## Warning: Removed 24 rows containing non-finite values (stat_density).

Secchi Depth

ggplot(redOWCSLAP, aes(x=Secchi_Depth)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Secchi Depth", x="Secchi depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 30 rows containing non-finite values (stat_bin).
## Warning: Removed 30 rows containing non-finite values (stat_density).

True Color

ggplot(redOWCSLAP, aes(x=True_Color)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="True Color", x="True color (PTU)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).

Shoreline Bloom Chlorophyll

ggplot(redSBCSLAP, aes(x=ESF_FP_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Shoreline Bloom Microcystin

ggplot(redSBCSLAP, aes(x=ESF_Microcystin)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 55 rows containing non-finite values (stat_bin).
## Warning: Removed 55 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.

Historical Analyses for Invaded Lakes

Secchi Depth

Because of the way the data were given to me, Secchi depth has to be done first on its own

hisCSLAP<-read.csv("./historical_data/Historical_CSLAP.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))

#extract sample year and month 
hisCSLAP$Sample_Year<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
hisCSLAP$Sample_Month<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))

#read in invasion year information 
Invasion_Year<-read.csv("./raw_data/InvasionYear.csv", header=TRUE, colClasses=c("factor", "numeric"))

#merge with hisCSLAP
hisCSLAP<-merge(hisCSLAP, Invasion_Year, all.x=TRUE)

#create new column `Years_Invaded` by subtracting `Invasion Year` from `Sample Year`
hisCSLAP$Years_Invaded<-hisCSLAP$Sample_Year-hisCSLAP$Invasion_Year

#turn `Sample_Year`, `Sample_Month`, and `Invasion_Year` back to factors
hisCSLAP$Sample_Year<-as.factor(hisCSLAP$Sample_Year)
hisCSLAP$Sample_Month<-as.factor(hisCSLAP$Sample_Month)
hisCSLAP$Invasion_Year<-as.factor(hisCSLAP$Invasion_Year)

#create column with 'Invaded' or "Uninvaded" based on `Years Invaded`
hisCSLAP$Dreissenids<-ifelse(hisCSLAP$Years_Invaded >= 0, "Invaded", "Uninvaded")
hisCSLAP$Dreissenids<-as.factor(hisCSLAP$Dreissenids)
#Aggregating yearly means by lake and years invaded
Secchi<-aggregate(Secchi_Depth_m ~ Lake_Name + Years_Invaded, data=hisCSLAP, FUN=mean)

Chl<-aggregate(Extracted_Chl_ug.L ~ Lake_Name + Years_Invaded, data=hisCSLAP, FUN=mean)
#t-test pre and post invasion and MEMs
hist(hisCSLAP$Secchi_Depth_m)

t.test(Secchi_Depth_m~Dreissenids, data=hisCSLAP)
## 
##  Welch Two Sample t-test
## 
## data:  Secchi_Depth_m by Dreissenids
## t = 10.37, df = 1054.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.6735570 0.9879379
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                4.672985                3.842238
Secchi.lmer<-lmer(Secchi_Depth_m ~ Dreissenids + (1|LakeID), data=hisCSLAP)

summary(Secchi.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Secchi_Depth_m ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP
## 
## REML criterion at convergence: 4388.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9324 -0.6854 -0.0512  0.6419  3.5186 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.4754   0.6895  
##  Residual             1.5707   1.2533  
## Number of obs: 1324, groups:  LakeID, 8
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)             4.62638    0.25053    7.48842  18.466 1.62e-07 ***
## DreissenidsUninvaded   -0.70457    0.07522 1320.02303  -9.367  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.175
ggplot(hisCSLAP, aes(x=Dreissenids, y=Secchi_Depth_m))+
  geom_boxplot()+
  theme_minimal()+
  labs(y = "Secchi Depth (m)")
## Warning: Removed 779 rows containing non-finite values (stat_boxplot).

#ggsave("historical_Secchi.png")

Data Read In For the Rest of the Variables

#historicalCSLAP df edited to match DeRuyter and EatonBrook dfs
hisCSLAP<-read.csv("./historical_data/Historical_CSLAP_edited.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))

#DeRuyter and EatonBrook dfs
Eaton<-read.csv("./historical_data/EatonHistoricalClean2.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))
Der<-read.csv("./historical_data/DeruyterHistoricalClean2.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))

#change levels of Info_Type 
hisCSLAP$Info_Type[hisCSLAP$Info_Type == "ow"]<-"OW"
hisCSLAP$Info_Type[hisCSLAP$Info_Type == "sb"]<-"SB"
hisCSLAP$Info_Type<-as.factor(hisCSLAP$Info_Type)
hisCSLAP$Info_Type<-droplevels(hisCSLAP$Info_Type)
#Merge hisCSLAP with Deruyter and Eatonbrook 

#remove any observations from Eaton or DeRuyter from hisCSLAP 
hisCSLAP<-hisCSLAP[hisCSLAP$Lake_Name != "De Ruyter Reservoir",]
hisCSLAP<-hisCSLAP[hisCSLAP$Lake_Name != "Eaton Brook Reservoir",]

#row bind cleaned DeRuyter and EatonBrook 
hisCSLAP<-rbind(hisCSLAP, Eaton)
hisCSLAP<-rbind(hisCSLAP, Der)

Further organize hisCSLAP, and merge physical characteristics and invasion status

#extract sample year and month 
hisCSLAP$Sample_Year<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
hisCSLAP$Sample_Month<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))

#merge Invasion Year with hisCSLAP
hisCSLAP<-merge(hisCSLAP, Invasion_Year, all.x=TRUE)

#create new column `Years_Invaded` by subtracting `Invasion Year` from `Sample Year`
hisCSLAP$Years_Invaded<-hisCSLAP$Sample_Year-hisCSLAP$Invasion_Year

#turn `Sample_Year`, `Sample_Month`, and `Invasion_Year` back to factors
hisCSLAP$Sample_Year<-as.factor(hisCSLAP$Sample_Year)
hisCSLAP$Sample_Month<-as.factor(hisCSLAP$Sample_Month)
hisCSLAP$Invasion_Year<-as.factor(hisCSLAP$Invasion_Year)

#create column with 'Invaded' or "Uninvaded" based on `Years Invaded`
hisCSLAP$Dreissenids<-ifelse(hisCSLAP$Years_Invaded >= 0, "Invaded", "Uninvaded")
hisCSLAP$Dreissenids<-as.factor(hisCSLAP$Dreissenids)

#we will also remove and shoreline bloom samples 
hisCSLAP<-hisCSLAP[hisCSLAP$Info_Type != "SB",]
hisCSLAP$Info_Type<-droplevels(hisCSLAP$Info_Type)

#create a TN:TP column 
hisCSLAP$TN_TP<-hisCSLAP$TN_mg.L/hisCSLAP$TP_mg.L

#finally, create a df with only OW samples and a df with only BS samples 
OWhisCSLAP<-hisCSLAP[hisCSLAP$Info_Type == "OW",]
BShisCSLAP<-hisCSLAP[hisCSLAP$Info_Type == "BS",]

Aggregating yearly means by lake and years invaded

Chl<-aggregate(Extracted_Chl_ug.L ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

OWTP<-aggregate(TP_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

BSTP<-aggregate(TP_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "BS",], FUN=mean)

OWTN<-aggregate(TN_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

OWTN_TP<-aggregate(TN_TP ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

TC<-aggregate(True_Color_PTU ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

Calc<-aggregate(Calcium_mg.L ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

How do lakes change after invasion? (t-test for pre and post invasion, then MEMs)

Extracted Chlorophyll-a

hist(log(OWhisCSLAP$Extracted_Chl_ug.L))

t.test(Extracted_Chl_ug.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  Extracted_Chl_ug.L by Dreissenids
## t = -12.443, df = 1320.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.624423 -1.909601
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                2.492524                4.759536
Chl.lmer<-lmer(log(Extracted_Chl_ug.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(Chl.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Extracted_Chl_ug.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 3050.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0999 -0.5200 -0.0071  0.5971  4.1661 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.04857  0.2204  
##  Residual             0.57543  0.7586  
## Number of obs: 1323, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)          6.034e-01  8.519e-02 7.636e+00   7.083  0.00013 ***
## DreissenidsUninvaded 6.015e-01  4.518e-02 1.318e+03  13.312  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.306
plot(Chl.lmer)

Open Water TP

hist(log(OWhisCSLAP$TP_mg.L))

t.test(TP_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  TP_mg.L by Dreissenids
## t = 3.1251, df = 760.3, p-value = 0.001845
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0003861491 0.0016908695
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##              0.01197581              0.01093730
OWTP.lmer<-lmer(log(TP_mg.L) ~ Dreissenids  + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(OWTP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP_mg.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 868.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0407 -0.6192 -0.0512  0.5392  6.2139 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.03243  0.1801  
##  Residual             0.11009  0.3318  
## Number of obs: 1315, groups:  LakeID, 8
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)            -4.48194    0.06539    7.42433 -68.539 1.13e-11 ***
## DreissenidsUninvaded   -0.11493    0.01972 1310.85322  -5.827 7.10e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.171
plot(OWTP.lmer)

Bottom Sample TP

hist(log(BShisCSLAP$TP_mg.L))

t.test(TP_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "BS",])
## 
##  Welch Two Sample t-test
## 
## data:  TP_mg.L by Dreissenids
## t = -2.6076, df = 243.03, p-value = 0.009682
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08319781 -0.01159341
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##              0.04436469              0.09176031
BSTP.lmer<-lmer(log(TP_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "BS",])

summary(BSTP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP_mg.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "BS", ]
## 
## REML criterion at convergence: 1778.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4685 -0.5772 -0.1504  0.5627  4.1494 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.4817   0.6941  
##  Residual             0.8566   0.9255  
## Number of obs: 651, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)           -3.78692    0.24993   7.12997 -15.152  1.1e-06 ***
## DreissenidsUninvaded   0.33838    0.08942 647.96512   3.784 0.000169 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.106
plot(BSTP.lmer)

Open Water TN

hist(log(OWhisCSLAP$TN_mg.L))

t.test(TN_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  TN_mg.L by Dreissenids
## t = -0.82352, df = 240.57, p-value = 0.411
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.12748917  0.05231878
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##               0.4895265               0.5271117
OWTN.lmer<-lmer(log(TN_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(OWTN.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_mg.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 880.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.3884 -0.5927 -0.0039  0.5499  5.6278 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.2847   0.5336  
##  Residual             0.2084   0.4565  
## Number of obs: 661, groups:  LakeID, 8
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)   
## (Intercept)           -0.855286   0.189889   7.030046  -4.504  0.00275 **
## DreissenidsUninvaded   0.004495   0.048195 655.931431   0.093  0.92572   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.060

Open Water TN:TP

hist(log(OWhisCSLAP$TN_TP))

t.test(TN_TP ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  TN_TP by Dreissenids
## t = -0.65903, df = 251.56, p-value = 0.5105
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.553469   5.261396
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                45.67168                48.31771
OWTN_TP.lmer<-lmer(log(TN_TP) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(OWTN_TP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_TP) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 1073.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2261 -0.5569 -0.0289  0.5560  4.2435 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.2491   0.4991  
##  Residual             0.2877   0.5364  
## Number of obs: 651, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            3.62840    0.17831   7.05445   20.35 1.59e-07 ***
## DreissenidsUninvaded   0.06740    0.05665 647.39946    1.19    0.235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.075

Open Water True Color

t.test(True_Color_PTU ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  True_Color_PTU by Dreissenids
## t = 11.754, df = 1008.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5.747462 8.051237
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##               14.263158                7.363808
TC.lmer<-lmer(True_Color_PTU ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(TC.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: True_Color_PTU ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 9688.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3027 -0.5143 -0.1901  0.2417  7.1496 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 44.38    6.662   
##  Residual             85.84    9.265   
## Number of obs: 1325, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            15.1934     2.3925    7.0424   6.351 0.000376 ***
## DreissenidsUninvaded   -5.8055     0.5509 1319.0393 -10.537  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.132

Calcium

t.test(Calcium_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  Calcium_mg.L by Dreissenids
## t = -0.034601, df = 64.648, p-value = 0.9725
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.422633  4.272010
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                30.53299                30.60830
Calc.lmer<-lmer(Calcium_mg.L ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(Calc.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Calcium_mg.L ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 858.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7693 -0.3958 -0.0114  0.3219  3.5680 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 88.02    9.382   
##  Residual             43.69    6.610   
## Number of obs: 127, groups:  LakeID, 8
## 
## Fixed effects:
##                      Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)            31.031      3.414   7.283   9.090 3.13e-05 ***
## DreissenidsUninvaded    2.383      1.637 122.621   1.455    0.148    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.149

Boxplots

Chlorophyll-a

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=Extracted_Chl_ug.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Extracted Chlorophyll-a (mg/L)")
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_Chl.png")

OWTP

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TP_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water TP (mg/L)")
## Warning: Removed 62 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTP.png")

BSTP

ggplot(hisCSLAP[hisCSLAP$Info_Type == "BS",], aes(x=Dreissenids, y=TP_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Bottom Sample TP (mg/L)")
## Warning: Removed 228 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_BSTP.png")

OWTN

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TN_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water TN (mg/L)")
## Warning: Removed 716 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTN.png")

OWTN_TP

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TN_TP)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water TN:TP")
## Warning: Removed 726 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTN_TP.png")

OW TC

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=True_Color_PTU)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water True Color")
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTC.png")

OW Calcium

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=Calcium_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water Calcium")
## Warning: Removed 1250 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWCalcium.png")

Mixed Effects Models

Global Dataset

Open water TN:TP

TNTP<-lmer(log(TN_TP+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(TNTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_TP + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 4288.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -14.8325  -0.4551   0.0402   0.4806   9.2136 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.052851 0.22989 
##  LakeID             (Intercept) 0.058346 0.24155 
##  Sample_Month       (Intercept) 0.001713 0.04139 
##  Sample_Year        (Intercept) 0.009578 0.09787 
##  Residual                       0.255404 0.50538 
## Number of obs: 2601, groups:  
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)           3.183775   0.165460 79.439722  19.242   <2e-16 ***
## DreissenidsUninvaded -0.032480   0.105180 92.981532  -0.309   0.7582    
## log(CA.SA)            0.048601   0.033665 61.368921   1.444   0.1539    
## log(Mean_Depth_m)     0.125403   0.052248 63.715137   2.400   0.0193 *  
## Percent_Ag            0.001651   0.002880 66.035094   0.573   0.5683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.673                     
## log(CA.SA)  -0.458  0.044              
## lg(Mn_Dpt_) -0.520  0.087 -0.040       
## Percent_Ag  -0.279  0.326 -0.044 -0.053
plot(TNTP)

qqPlot(resid(TNTP))

## 1395 4458 
##  711 2353

Bottom Sample TP

BSTP<-lmer(log(TP+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=BSCSLAP)

summary(BSTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: BSCSLAP
## 
## REML criterion at convergence: 3060.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4588 -0.4356 -0.0638  0.3495  4.9671 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.081919 0.28621 
##  LakeID             (Intercept) 0.387229 0.62228 
##  Sample_Month       (Intercept) 0.007195 0.08482 
##  Sample_Year        (Intercept) 0.003081 0.05551 
##  Residual                       0.243055 0.49301 
## Number of obs: 1808, groups:  
## Sample_Year:LakeID, 269; LakeID, 53; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)           -3.550746   0.448242  60.210119  -7.921 6.52e-11 ***
## DreissenidsUninvaded   0.246827   0.206337 134.468860   1.196  0.23371    
## log(CA.SA)            -0.013752   0.090666  48.401520  -0.152  0.88007    
## log(Mean_Depth_m)     -0.145212   0.203496  48.540796  -0.714  0.47890    
## Percent_Ag             0.022462   0.006911  50.849848   3.250  0.00205 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.478                     
## log(CA.SA)  -0.244  0.063              
## lg(Mn_Dpt_) -0.740  0.022 -0.252       
## Percent_Ag  -0.346  0.256 -0.116  0.177
plot(BSTP)

qqPlot(resid(BSTP))

## 3945  946 
## 1445  355

Open water TN

Full model error: Model failed to converge

-Remove Sample_Month. Sample_Year, Sample_Year:LakeID (low variance) to resolve error

TN<-lmer(log(TN+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +   (1|LakeID), data=OWCSLAP)

summary(TN)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 2964
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.3622 -0.5006  0.0011  0.4902  8.9046 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.06457  0.2541  
##  Residual             0.16644  0.4080  
## Number of obs: 2638, groups:  LakeID, 67
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)           -0.977263   0.140314  91.572863  -6.965 4.89e-10 ***
## DreissenidsUninvaded  -0.075003   0.083028 198.446052  -0.903  0.36744    
## log(CA.SA)             0.047215   0.032257  62.385805   1.464  0.14828    
## log(Mean_Depth_m)     -0.137653   0.049696  63.297223  -2.770  0.00735 ** 
## Percent_Ag             0.012407   0.002696  67.534806   4.602 1.90e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.628                     
## log(CA.SA)  -0.512  0.042              
## lg(Mn_Dpt_) -0.568  0.071 -0.034       
## Percent_Ag  -0.238  0.276 -0.053 -0.063
plot(TN)

qqPlot(resid(TN))

## 3666 1395 
## 1966  711

Open water chlorophyll

Chl<-lmer(log(Extracted_Chl.a+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(Chl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(Extracted_Chl.a + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 5949.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.5567 -0.4517  0.0417  0.5127  4.1829 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.05400  0.2324  
##  LakeID             (Intercept) 0.21540  0.4641  
##  Sample_Month       (Intercept) 0.02576  0.1605  
##  Sample_Year        (Intercept) 0.01314  0.1146  
##  Residual                       0.49917  0.7065  
## Number of obs: 2591, groups:  
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)            1.355434   0.282568  85.411626   4.797 6.77e-06 ***
## DreissenidsUninvaded  -0.146767   0.167536 127.257977  -0.876  0.38266    
## log(CA.SA)             0.037582   0.060147  56.206917   0.625  0.53461    
## log(Mean_Depth_m)     -0.295017   0.092811  57.298075  -3.179  0.00239 ** 
## Percent_Ag             0.002679   0.005059  60.535425   0.529  0.59842    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.628                     
## log(CA.SA)  -0.476  0.042              
## lg(Mn_Dpt_) -0.533  0.078 -0.033       
## Percent_Ag  -0.244  0.294 -0.052 -0.061
plot(Chl)

qqPlot(resid(Chl))

## 1989 3530 
## 1003 1856

Secchi

Secchi<-lmer(log(Secchi_Depth+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(Secchi)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(Secchi_Depth + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: -311.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.9201 -0.5351  0.0207  0.5662  4.7949 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 1.150e-02 0.107227
##  LakeID             (Intercept) 7.080e-02 0.266084
##  Sample_Month       (Intercept) 4.551e-05 0.006746
##  Sample_Year        (Intercept) 8.639e-04 0.029393
##  Residual                       4.079e-02 0.201956
## Number of obs: 2590, groups:  
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)            1.063472   0.140820  95.144934   7.552 2.60e-11 ***
## DreissenidsUninvaded  -0.091870   0.078020 230.031893  -1.178   0.2402    
## log(CA.SA)            -0.071893   0.033512  62.546322  -2.145   0.0358 *  
## log(Mean_Depth_m)      0.336271   0.051584  63.305400   6.519 1.34e-08 ***
## Percent_Ag            -0.002311   0.002784  68.034599  -0.830   0.4093    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.586                     
## log(CA.SA)  -0.523  0.034              
## lg(Mn_Dpt_) -0.579  0.064 -0.033       
## Percent_Ag  -0.212  0.250 -0.059 -0.068
plot(Secchi)

qqPlot(resid(Secchi))

## 4582 4641 
## 2411 2440

Open Water True Color

TC<-lmer(log(True_Color+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(TC)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(True_Color + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 2860.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.0242 -0.4258  0.0510  0.5180  3.4438 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.06792  0.2606  
##  LakeID             (Intercept) 0.18192  0.4265  
##  Sample_Month       (Intercept) 0.01635  0.1279  
##  Sample_Year        (Intercept) 0.07901  0.2811  
##  Residual                       0.13060  0.3614  
## Number of obs: 2612, groups:  
## Sample_Year:LakeID, 343; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)           2.179e+00  2.738e-01  7.195e+01   7.957 1.85e-11 ***
## DreissenidsUninvaded  3.631e-01  1.480e-01  1.507e+02   2.453  0.01532 *  
## log(CA.SA)            1.552e-01  5.483e-02  6.103e+01   2.830  0.00629 ** 
## log(Mean_Depth_m)    -2.406e-01  8.455e-02  6.207e+01  -2.846  0.00599 ** 
## Percent_Ag           -2.579e-04  4.599e-03  6.587e+01  -0.056  0.95545    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.572                     
## log(CA.SA)  -0.446  0.039              
## lg(Mn_Dpt_) -0.498  0.074 -0.033       
## Percent_Ag  -0.218  0.285 -0.054 -0.063
plot(TC)

qqPlot(resid(TC))

## 3656 4774 
## 1940 2531

Shoreline Bloom Chlorophyll a

Full model error: boundary (singular) fit and model failed to converge

Remove Sample_Year, Sample_Month, and Sample_Year:LakeID (low variance) to resolve

SBChl<-lmer(log(ESF_FP_Chl.a+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag  + (1|LakeID), data=SBCSLAP)

summary(SBChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(ESF_FP_Chl.a + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | LakeID)
##    Data: SBCSLAP
## 
## REML criterion at convergence: 990.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.96703 -0.56007  0.06596  0.62799  2.26269 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 2.293    1.514   
##  Residual             4.428    2.104   
## Number of obs: 219, groups:  LakeID, 42
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)   
## (Intercept)           5.12907    1.56564 35.94401   3.276  0.00234 **
## DreissenidsUninvaded -0.21254    0.98272 46.03089  -0.216  0.82973   
## log(CA.SA)            0.26531    0.33747 35.26516   0.786  0.43701   
## log(Mean_Depth_m)    -0.42431    0.50696 28.67188  -0.837  0.40953   
## Percent_Ag            0.03699    0.02638 35.94149   1.402  0.16953   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.710                     
## log(CA.SA)  -0.503  0.070              
## lg(Mn_Dpt_) -0.544  0.071  0.058       
## Percent_Ag  -0.392  0.501 -0.077 -0.090
plot(SBChl)

qqPlot(resid(SBChl))

##  614 4310 
##   44  195

Shoreline Bloom Microcystin

We want to add TN:TP as a predictor (fixed effect), but these variables aren’t collected with shoreline bloom data. So we will instead use the yearly average for a lake.

#Extracting average annual TN_TP for each lake 
library(plyr)
avgTNTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))
colnames(avgTNTP)[colnames(avgTNTP)=="Mean"] <- "TN_TP"

#Merge these values to the SBCSLAP df 
SBCSLAP<-SBCSLAP[,c(1:49, 51:54)]
SBCSLAP<-merge(SBCSLAP, avgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)

Full model error: boundary (singular) fit

Remove Sample_Month and Sample_Year:LakeID to resolve

SBmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m + Percent_Ag  + (1|Sample_Year) + (1|LakeID), data=SBCSLAP)

summary(SBmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m +  
##     Percent_Ag + (1 | Sample_Year) + (1 | LakeID)
##    Data: SBCSLAP
## 
## REML criterion at convergence: 176.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.28513 -0.53162  0.06219  0.53296  1.59305 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  LakeID      (Intercept) 1.846    1.359   
##  Sample_Year (Intercept) 5.793    2.407   
##  Residual                2.281    1.510   
## Number of obs: 43, groups:  LakeID, 7; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)          16.56817    6.64246  1.83807   2.494    0.141
## DreissenidsUninvaded -1.70297    2.24234  0.83298  -0.759    0.606
## TN_TP                -0.02691    0.02376  5.71424  -1.133    0.303
## log(CA.SA)           -3.02465    2.17918  1.75354  -1.388    0.315
## Mean_Depth_m         -0.66268    0.44983  1.60063  -1.473    0.307
## Percent_Ag           -0.06315    0.06490  1.31865  -0.973    0.475
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU TN_TP  l(CA.S Mn_Dp_
## DrssndsUnnv -0.285                            
## TN_TP       -0.031 -0.049                     
## log(CA.SA)  -0.850 -0.154 -0.040              
## Mean_Dpth_m -0.745  0.169 -0.283  0.583       
## Percent_Ag  -0.694  0.596 -0.245  0.415  0.506
plot(SBmicro)

qqPlot(resid(SBmicro))

## 60 36 
## 23 16

Reduced Dataset

Open water TN:TP

redTNTP<-lmer(log(TN_TP+.01) ~ Dreissenids +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redTNTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(TN_TP + 0.01) ~ Dreissenids + (1 | Sample_Year) + (1 | Sample_Month) +  
##     (1 | LakeID) + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 1053
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -14.6301  -0.3793   0.0670   0.4569   4.2824 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 5.119e-02 0.226251
##  LakeID             (Intercept) 1.933e-01 0.439666
##  Sample_Month       (Intercept) 4.381e-05 0.006619
##  Sample_Year        (Intercept) 3.952e-03 0.062861
##  Residual                       2.563e-01 0.506224
## Number of obs: 631, groups:  
## Sample_Year:LakeID, 82; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           3.57036    0.14745 22.47365   24.21   <2e-16 ***
## DreissenidsUninvaded -0.09378    0.17050 47.13269   -0.55    0.585    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.604
plot(redTNTP)

qqPlot(resid(redTNTP))

## 359 866 
## 152 401

Bottom Sample TP

redBSTP<-lmer(log(TP+.01) ~ Dreissenids  +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=redBSCSLAP)

summary(redBSTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(TP + 0.01) ~ Dreissenids + (1 | Sample_Year) + (1 | Sample_Month) +  
##     (1 | LakeID) + (1 | Sample_Year:LakeID)
##    Data: redBSCSLAP
## 
## REML criterion at convergence: 941.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9133 -0.5193 -0.0992  0.3425  4.2140 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.08718  0.29527 
##  LakeID             (Intercept) 0.19656  0.44335 
##  Sample_Month       (Intercept) 0.01368  0.11695 
##  Sample_Year        (Intercept) 0.00338  0.05813 
##  Residual                       0.23610  0.48590 
## Number of obs: 567, groups:  
## Sample_Year:LakeID, 86; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)          -3.41111    0.16546 21.79854 -20.616 8.73e-16 ***
## DreissenidsUninvaded -0.06031    0.19384 32.13280  -0.311    0.758    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.609
plot(redBSTP)

qqPlot(resid(redBSTP))

## 930 870 
## 393 367

Open Water Chlorophyll-a

redChl<-lmer(log(Extracted_Chl.a+1) ~ Dreissenids + (1|LakeID) + (1|Sample_Year) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Extracted_Chl.a + 1) ~ Dreissenids + (1 | LakeID) + (1 |  
##     Sample_Year) + (1 | Sample_Month) + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 888.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6791 -0.6123 -0.0104  0.5567  3.7645 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.0245791 0.1568  
##  LakeID             (Intercept) 0.0623684 0.2497  
##  Sample_Month       (Intercept) 0.0110021 0.1049  
##  Sample_Year        (Intercept) 0.0009305 0.0305  
##  Residual                       0.2031526 0.4507  
## Number of obs: 635, groups:  
## Sample_Year:LakeID, 83; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           1.21908    0.10166 18.65722  11.991 3.29e-10 ***
## DreissenidsUninvaded -0.03321    0.11175 28.88199  -0.297    0.768    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.573
plot(redChl)

qqPlot(resid(redChl))

## 433 161 
## 199  69

Open Water Secchi Depth

-Sample_Year and Sample_Month removed for having low variance

redSecchi <- lmer(log(Secchi_Depth) ~ Dreissenids  + (1|LakeID)+ (1|Sample_Year) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redSecchi)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Secchi_Depth) ~ Dreissenids + (1 | LakeID) + (1 | Sample_Year) +  
##     (1 | Sample_Month) + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 151
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0631 -0.5199 -0.0037  0.5757  3.9560 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.0221217 0.14873 
##  LakeID             (Intercept) 0.0470875 0.21700 
##  Sample_Month       (Intercept) 0.0002708 0.01646 
##  Sample_Year        (Intercept) 0.0005295 0.02301 
##  Residual                       0.0582462 0.24134 
## Number of obs: 629, groups:  
## Sample_Year:LakeID, 82; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           1.44876    0.07569 21.04598  19.140 8.61e-15 ***
## DreissenidsUninvaded -0.08699    0.09117 39.48953  -0.954    0.346    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.627
plot(redSecchi)

qqPlot(resid(redSecchi))

##  959 1261 
##  439  595

Open Water True Color

Full model error: Model failed to converge

Remove Sample_Year and Sample_Month (low variance) to resolve

redOWTC<-lmer(True_Color ~ Dreissenids  + (1|LakeID) + + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redOWTC)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: True_Color ~ Dreissenids + (1 | LakeID) + +(1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 3965.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2594 -0.4913 -0.0763  0.4095  5.1558 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 49.79    7.056   
##  LakeID             (Intercept) 18.05    4.249   
##  Residual                       19.51    4.417   
## Number of obs: 638, groups:  Sample_Year:LakeID, 83; LakeID, 16
## 
## Fixed effects:
##                      Estimate Std. Error     df t value Pr(>|t|)    
## (Intercept)            12.204      1.860 15.369   6.562 7.98e-06 ***
## DreissenidsUninvaded    2.918      2.497 19.403   1.168    0.257    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.699
plot(redOWTC)

qqPlot(resid(redOWTC))

## 318 311 
## 140 135

Shoreline Bloom Chlorophyll a

-Sample_Year removed for low variance

redSBChl<-lmer(log(ESF_FP_Chl.a) ~ Dreissenids + (1|Sample_Month) + (1|LakeID) , data=redSBCSLAP)

summary(redSBChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(ESF_FP_Chl.a) ~ Dreissenids + (1 | Sample_Month) + (1 | LakeID)
##    Data: redSBCSLAP
## 
## REML criterion at convergence: 342.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.49435 -0.52241  0.07225  0.49418  1.98859 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  LakeID       (Intercept) 2.057    1.434   
##  Sample_Month (Intercept) 1.822    1.350   
##  Residual                 4.384    2.094   
## Number of obs: 76, groups:  LakeID, 10; Sample_Month, 6
## 
## Fixed effects:
##                      Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)            5.9401     0.9878  7.9946   6.014 0.000319 ***
## DreissenidsUninvaded  -0.2759     1.0549 12.7054  -0.262 0.797879    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.534
plot(redSBChl)

qqPlot(resid(redSBChl))

## 777 319 
##  58  26

Shoreline Bloom Microcystin

Same as for the global dataset, we need to use average yearly TN:TP values

#Extracting average annual TP for each lake 
library(plyr)
redavgTNTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))
colnames(redavgTNTP)[colnames(redavgTNTP)=="Mean"] <- "TN_TP"

#Merge these values to the SBCSLAP df 
redSBCSLAP<-redSBCSLAP[,c(1:49, 51:54)]
redSBCSLAP<-merge(redSBCSLAP, redavgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)

Full model error: failed to converge

Remove Sample_Month, LakeID, and Sample_Year:LakeID to resolve

redSBmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + log(TN_TP)  + (1|Sample_Year)  , data=redSBCSLAP)

summary(redSBmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(ESF_Microcystin) ~ Dreissenids + log(TN_TP) + (1 | Sample_Year)
##    Data: redSBCSLAP
## 
## REML criterion at convergence: 65.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.05259 -0.49531  0.08973  0.52344  1.73701 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Sample_Year (Intercept) 0.6936   0.8328  
##  Residual                1.8834   1.3724  
## Number of obs: 20, groups:  Sample_Year, 5
## 
## Fixed effects:
##                      Estimate Std. Error      df t value Pr(>|t|)
## (Intercept)            6.4301     3.9041  1.6197   1.647    0.269
## DreissenidsUninvaded   1.8504     1.7803  4.7588   1.039    0.349
## log(TN_TP)            -0.7165     0.8957  1.5288  -0.800    0.529
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU
## DrssndsUnnv -0.343       
## log(TN_TP)  -0.990  0.303
plot(redSBmicro)

qqPlot(resid(redSBmicro))

## 25 19 
## 15 13

HABs Frequency

#Global Dataset

#create column based on 25 ug/L BGA guideline (from NYS DEC)
CSLAP$Bloom <- ifelse(CSLAP$ESF_FP_BGA >=25.0, "Bloom", "No Bloom")
CSLAP$Bloom <- as.factor(CSLAP$Bloom)

#Separate based on invasion status and drop unused levels of Lake Name
I.CSLAP<-CSLAP[CSLAP$Dreissenids == "Invaded",]
I.CSLAP$Lake_Name<-droplevels(I.CSLAP$Lake_Name)

U.CSLAP<-CSLAP[CSLAP$Dreissenids == "Uninvaded",]
U.CSLAP$Lake_Name<-droplevels(U.CSLAP$Lake_Name)

#Reduced Dataset

#create column based on 25 ug/L BGA guideline (from NYS DEC)
redCSLAP$Bloom <- ifelse(redCSLAP$ESF_FP_BGA >=25.0, "Bloom", "No Bloom")
redCSLAP$Bloom <- as.factor(redCSLAP$Bloom)

#Separate based on invasion status and drop unused levels of Lake Name
I.redCSLAP<-redCSLAP[redCSLAP$Dreissenids == "Invaded",]
I.redCSLAP$Lake_Name<-droplevels(I.redCSLAP$Lake_Name)

U.redCSLAP<-redCSLAP[redCSLAP$Dreissenids == "Uninvaded",]
U.redCSLAP$Lake_Name<-droplevels(U.redCSLAP$Lake_Name)

Global Dataset: All Blooms

table(CSLAP[, c("Bloom", "Dreissenids")])
##           Dreissenids
## Bloom      Invaded Uninvaded
##   Bloom         42        98
##   No Bloom     326      2355
HABFreqYear<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Sample_Year")]))

HABFreqLake<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Lake_Name")]))

#separate by invaded and uninvaded
I.blooms<-as.data.frame(table(I.CSLAP[,c("Lake_Name","Bloom")]))
I.blooms$Dreissenids<-rep("Invaded", 16)

U.blooms<-as.data.frame(table(U.CSLAP[,c("Lake_Name","Bloom")]))
U.blooms$Dreissenids<-rep("Uninvaded", 124)

#Make into one dataframe for analysis
blooms<-rbind(I.blooms, U.blooms)

#Remove 'no blooms' 
blooms<-blooms[blooms$Bloom == "Bloom",]

AOV

fit<-aov(Freq ~ Dreissenids, data=blooms)
summary(fit)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Dreissenids  1   95.4   95.40   4.652 0.0346 *
## Residuals   68 1394.6   20.51                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = blooms)
## 
## $Dreissenids
##                        diff       lwr        upr    p adj
## Uninvaded-Invaded -3.669355 -7.064224 -0.2744858 0.034559

GLMER poisson

hist(blooms$Freq)

model<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=blooms)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
##    Data: blooms
## 
##      AIC      BIC   logLik deviance df.resid 
##    238.7    245.5   -116.4    232.7       67 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.5723 -0.4914 -0.4914  0.2667  0.9971 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Lake_Name (Intercept) 2.755    1.66    
## Number of obs: 70, groups:  Lake_Name, 68
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -0.2140     0.7002  -0.306     0.76
## DreissenidsUninvaded  -0.5421     0.6722  -0.806     0.42
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.887
res<-simulateResiduals(model)
plot(res)

testResiduals(res)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.07856, p-value = 0.7508
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.87134, p-value = 0.832
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 70.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.07856, p-value = 0.7508
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.87134, p-value = 0.832
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 70.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.97522, p-value = 0.88
## alternative hypothesis: two.sided

Reduced Dataset: All Blooms

redHABFreqYear<-as.data.frame(table(redCSLAP[,c("Bloom","Dreissenids","Sample_Year")]))

##Data organization for analysis 

#separate by invaded and uninvaded
I.redblooms<-as.data.frame(table(I.redCSLAP[,c("Lake_Name","Bloom")]))
I.redblooms$Dreissenids<-rep("Invaded", 16)

U.redblooms<-as.data.frame(table(U.redCSLAP[,c("Lake_Name","Bloom")]))
U.redblooms$Dreissenids<-rep("Uninvaded", 20)

#Make into one dataframe for analysis
redblooms<-rbind(I.redblooms, U.redblooms)

#Remove 'no blooms' 
redblooms<-redblooms[redblooms$Bloom == "Bloom",]

AOV

redfit<-aov(Freq ~ Dreissenids, data=redblooms)
summary(redfit)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids  1   80.3   80.28   2.312  0.148
## Residuals   16  555.5   34.72
TukeyHSD(redfit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = redblooms)
## 
## $Dreissenids
##                    diff       lwr      upr     p adj
## Uninvaded-Invaded -4.25 -10.17502 1.675019 0.1478745

GLMER poisson

hist(redblooms$Freq)

redmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=redblooms)
summary(redmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
##    Data: redblooms
## 
##      AIC      BIC   logLik deviance df.resid 
##     75.1     77.8    -34.6     69.1       15 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.62869 -0.57154  0.08755  0.23899  0.73619 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Lake_Name (Intercept) 2.376    1.541   
## Number of obs: 18, groups:  Lake_Name, 16
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)           0.01081    0.73633   0.015    0.988
## DreissenidsUninvaded -0.29741    0.79320  -0.375    0.708
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.699
redres<-simulateResiduals(redmodel)
plot(redres)
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.25. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.5. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.75. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.

testResiduals(redres)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.17329, p-value = 0.5924
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 1.1072, p-value = 0.528
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 18.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.17329, p-value = 0.5924
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 1.1072, p-value = 0.528
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 18.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(redres)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.91051, p-value = 0.952
## alternative hypothesis: two.sided

Global Dataset: HT Blooms

HTBloom<-CSLAP[which(CSLAP$Bloom == "Bloom"), ]

#create df of only OW blooms, then use 10 ug/L microcystin to get HT blooms (DEC open water guideline)
OWHTBloom<-HTBloom[HTBloom$Info_Type == "OW",]
OWHTBloom$HTBloom <- ifelse(OWHTBloom$ESF_Microcystin >= 10.0, "HT Bloom", "No HT Bloom")

#create df of only SB blooms, then use 20 ug/L microcystin to get HT blooms (DEC shoreline guideline)
SBHTBloom<-HTBloom[HTBloom$Info_Type == "SB",]
SBHTBloom$HTBloom <- ifelse(SBHTBloom$ESF_Microcystin >= 20.0, "HT Bloom", "No HT Bloom")

#Separate based on invasion status and drop unused levels of Lake Name
I.SBHTBloom<-SBHTBloom[SBHTBloom$Dreissenids == "Invaded",]
I.SBHTBloom$Lake_Name<-droplevels(I.SBHTBloom$Lake_Name)

U.SBHTBloom<-SBHTBloom[SBHTBloom$Dreissenids == "Uninvaded",]
U.SBHTBloom$Lake_Name<-droplevels(U.SBHTBloom$Lake_Name)
#separate by invaded and uninvaded
I.HTblooms<-as.data.frame(table(I.SBHTBloom[,c("Lake_Name","HTBloom")]))
I.HTblooms$Dreissenids<-rep("Invaded", 8)

U.HTblooms<-as.data.frame(table(U.SBHTBloom[,c("Lake_Name","HTBloom")]))
U.HTblooms$Dreissenids<-rep("Uninvaded", 56)

#Make into one dataframe for analysis
HTblooms<-rbind(I.HTblooms, U.HTblooms)

#Remove 'no blooms' 
HTblooms<-HTblooms[HTblooms$HTBloom == "HT Bloom",]

AOV

HTfit<-aov(Freq ~ Dreissenids, data=HTblooms)
summary(HTfit)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids  1   27.2   27.16   2.479  0.126
## Residuals   30  328.7   10.96
TukeyHSD(HTfit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = HTblooms)
## 
## $Dreissenids
##                        diff       lwr       upr     p adj
## Uninvaded-Invaded -2.785714 -6.399216 0.8277879 0.1258776

GLMER poisson

hist(HTblooms$Freq)

HTmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=HTblooms)
summary(HTmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
##    Data: HTblooms
## 
##      AIC      BIC   logLik deviance df.resid 
##     55.7     60.1    -24.9     49.7       29 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.14857 -0.04613 -0.04613 -0.04613  0.22154 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Lake_Name (Intercept) 29.59    5.439   
## Number of obs: 32, groups:  Lake_Name, 31
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -3.160      3.703  -0.853    0.393
## DreissenidsUninvaded   -2.929      3.086  -0.949    0.342
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.803
HTres<-simulateResiduals(HTmodel)
plot(HTres)

testResiduals(HTres)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.18589, p-value = 0.1929
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.00048173, p-value = 0.328
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 32.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.18589, p-value = 0.1929
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.00048173, p-value = 0.328
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 32.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(HTres)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.0312, p-value = 0.952
## alternative hypothesis: two.sided

Reduced Dataset: HT Blooms

redHTBloom<-redCSLAP[which(redCSLAP$Bloom == "Bloom"), ]

#create df of only OW blooms, then use 10 ug/L microcystin to get HT blooms (DEC open water guideline)
redOWHTBloom<-redHTBloom[redHTBloom$Info_Type == "OW",]
redOWHTBloom$HTBloom <- ifelse(redOWHTBloom$ESF_Microcystin >= 10.0, "HT Bloom", "No HT Bloom")

#create df of only SB blooms, then use 20 ug/L microcystin to get HT blooms (DEC shoreline guideline)
redSBHTBloom<-redHTBloom[redHTBloom$Info_Type == "SB",]
redSBHTBloom$HTBloom <- ifelse(redSBHTBloom$ESF_Microcystin >= 20.0, "HT Bloom", "No HT Bloom")

#Separate based on invasion status and drop unused levels of Lake Name
redI.SBHTBloom<-redSBHTBloom[redSBHTBloom$Dreissenids == "Invaded",]
redI.SBHTBloom$Lake_Name<-droplevels(redI.SBHTBloom$Lake_Name)

redU.SBHTBloom<-redSBHTBloom[redSBHTBloom$Dreissenids == "Uninvaded",]
redU.SBHTBloom$Lake_Name<-droplevels(redU.SBHTBloom$Lake_Name)
#separate by invaded and uninvaded
redI.HTblooms<-as.data.frame(table(redI.SBHTBloom[,c("Lake_Name","HTBloom")]))
redI.HTblooms$Dreissenids<-rep("Invaded", 8)

redU.HTblooms<-as.data.frame(table(redU.SBHTBloom[,c("Lake_Name","HTBloom")]))
redU.HTblooms$Dreissenids<-rep("Uninvaded", 6)

#Make into one dataframe for analysis
redHTblooms<-rbind(redI.HTblooms, redU.HTblooms)

#Remove 'no blooms' 
redHTblooms<-redHTblooms[redHTblooms$HTBloom == "HT Bloom",]

AOV

redHTfit<-aov(Freq ~ Dreissenids, data=redHTblooms)
summary(redHTfit)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids  1  26.67   26.67   1.751  0.222
## Residuals    8 121.83   15.23
TukeyHSD(redHTfit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = redHTblooms)
## 
## $Dreissenids
##                        diff       lwr      upr     p adj
## Uninvaded-Invaded -3.333333 -9.142215 2.475548 0.2223148

GLMER poisson

hist(redHTblooms$Freq)

redHTmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=redHTblooms)
summary(redHTmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
##    Data: redHTblooms
## 
##      AIC      BIC   logLik deviance df.resid 
##     28.7     29.6    -11.4     22.7        7 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.50742 -0.20937 -0.20937  0.02782  0.85377 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  Lake_Name (Intercept) 3.781    1.944   
## Number of obs: 10, groups:  Lake_Name, 9
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -0.3834     1.3842  -0.277    0.782
## DreissenidsUninvaded  -2.5782     1.8428  -1.399    0.162
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.464
redHTres<-simulateResiduals(redHTmodel)
plot(redHTres)
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.25. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.5. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.75. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.

testResiduals(redHTres)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.13156, p-value = 0.9859
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.67525, p-value = 0.504
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 10.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.13156, p-value = 0.9859
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.67525, p-value = 0.504
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 10.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(redHTres)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.0069, p-value = 1
## alternative hypothesis: two.sided

Linear Regressions

Global Dataset

#Extracting average annual TP for each lake 
library(plyr)
avgTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TP, na.rm=TRUE))

#Extracting average annual TN:TP for each lake 
avgTNTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))

#Taking only rows where there is a value for Microcystin 
Bloom<-CSLAP[!is.na(CSLAP$ESF_Microcystin),]

#Which rows are missing TP values? 
noTPbloom<-Bloom[is.na(Bloom$TP),]

#Which rows are missing TN:TP values? 
noTNTPbloom<-Bloom[is.na(Bloom$TN_TP),]

#merge average TP to noTPbloom data frame 
avgTPbloom<-merge(noTPbloom, avgTP, by=c("LakeID", "Sample_Year"), all.x=TRUE)

#merge average TN:TP to noTNTPbloom data frame 
avgTNTPbloom<-merge(noTNTPbloom, avgTNTP, by=c("LakeID", "Sample_Year"), all.x = TRUE)

#remove original TP column, and re-name the average TP column 
avgTPbloom<-avgTPbloom[, c(1:16,18:56)]
colnames(avgTPbloom)[colnames(avgTPbloom)=="Mean"] <- "TP"

#remove original TN:TP column, and re-name the average TN:TP column 
avgTNTPbloom<-avgTNTPbloom[, c(1:49, 51:56)]
colnames(avgTNTPbloom)[colnames(avgTNTPbloom)=="Mean"] <- "TN_TP"

#re-order columns 
avgTPbloom<-avgTPbloom[, c(1:16,55,17:54)]

#re-order columns 
avgTNTPbloom<-avgTNTPbloom[, c(1:49,55,50:54)]

#remove TN_TP from avgTPbloom and merge with avgTNTP bloom 
avgTPbloom<-avgTPbloom[,c(1:49,51:55)]
TNTPmerge<-avgTNTPbloom[,c(5,50)]
avgTPTNTPbloom<-merge(avgTPbloom, TNTPmerge, by="Sample_Name")

#re-order columns
avgTPTNTPbloom<-avgTPTNTPbloom[,c(1:49,55,50:54)]

#combine two data frames
hadTP<-Bloom[!is.na(Bloom$TP),]

completeBloom<-rbind(hadTP, avgTPTNTPbloom)


#Split by Info_Type
OWcompleteBloom<-completeBloom[completeBloom$Info_Type == "OW",]
SBcompleteBloom<-completeBloom[completeBloom$Info_Type == "SB",]

#split by invasion status 
OWinvadedBlooms<-OWcompleteBloom[OWcompleteBloom$Dreissenids == "Invaded",]
OWuninvadedBlooms<-OWcompleteBloom[OWcompleteBloom$Dreissenids == "Uninvaded",]

SBinvadedBlooms<-SBcompleteBloom[SBcompleteBloom$Dreissenids == "Invaded",]
SBuninvadedBlooms<-SBcompleteBloom[SBcompleteBloom$Dreissenids == "Uninvaded",]

TP and Microcystin

Open Water

#create linear models 
OWinvaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$TP+.1))
OWuninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$TP+.1))

summary(OWinvaded.lm)
## 
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48252 -0.17973  0.04444  0.11749  0.78628 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     8.6021     2.0963   4.103  0.00175 **
## log(OWinvadedBlooms$TP + 0.1)   4.2365     0.9805   4.321  0.00121 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3225 on 11 degrees of freedom
## Multiple R-squared:  0.6293, Adjusted R-squared:  0.5956 
## F-statistic: 18.67 on 1 and 11 DF,  p-value: 0.001213
summary(OWuninvaded.lm)
## 
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8498 -0.3228 -0.1828  0.0744  4.3311 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       -4.072      3.407  -1.195    0.235
## log(OWuninvadedBlooms$TP + 0.1)   -1.642      1.557  -1.054    0.295
## 
## Residual standard error: 0.6902 on 87 degrees of freedom
## Multiple R-squared:  0.01261,    Adjusted R-squared:  0.001258 
## F-statistic: 1.111 on 1 and 87 DF,  p-value: 0.2948
OWinvaded.lm.sum<-summary(OWinvaded.lm)
OWuninvaded.lm.sum<-summary(OWuninvaded.lm)
ggplot(OWcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(data=subset(OWcompleteBloom, Dreissenids == "Invaded" ),
               aes(x=TP, y=ESF_Microcystin, color=Dreissenids), method=lm, se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'

ggsave("./output_figures/Fig 2-1 Global OW.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'

Shoreline Bloom

#create linear models 
SBinvaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$TP+.1))
SBuninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$TP+.1))

summary(SBinvaded.lm)
## 
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2455 -0.7569  0.0850  0.8098  2.3340 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     19.834     16.000   1.240    0.232
## log(SBinvadedBlooms$TP + 0.1)    7.589      7.386   1.027    0.319
## 
## Residual standard error: 1.431 on 17 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.05847,    Adjusted R-squared:  0.003082 
## F-statistic: 1.056 on 1 and 17 DF,  p-value: 0.3186
summary(SBuninvaded.lm)
## 
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1845 -1.8375 -0.3783  1.8765  5.8238 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        34.79      35.71   0.974    0.340
## log(SBuninvadedBlooms$TP + 0.1)    13.95      16.52   0.845    0.407
## 
## Residual standard error: 2.653 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0314, Adjusted R-squared:  -0.01262 
## F-statistic: 0.7133 on 1 and 22 DF,  p-value: 0.4074
SBinvaded.lm.sum<-summary(SBinvaded.lm)
SBuninvaded.lm.sum<-summary(SBuninvaded.lm)
ggplot(SBcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()
## Warning: Removed 3 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-1 Global SB.png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).

TN:TP and Microcystin

Open Water

#create linear models 
OWTNTP_invaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$TN_TP+.1))
OWTNTP_uninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$TN_TP+.1))

summary(OWTNTP_invaded.lm)
## 
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37473 -0.23835 -0.22292 -0.06378  1.11484 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        0.5400     0.7164   0.754    0.467
## log(OWinvadedBlooms$TN_TP + 0.1)  -0.2916     0.2077  -1.404    0.188
## 
## Residual standard error: 0.4878 on 11 degrees of freedom
## Multiple R-squared:  0.152,  Adjusted R-squared:  0.07489 
## F-statistic: 1.971 on 1 and 11 DF,  p-value: 0.1879
summary(OWTNTP_uninvaded.lm)
## 
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8382 -0.2960 -0.2138  0.0185  4.3962 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                         -0.8216     0.5559  -1.478    0.143
## log(OWuninvadedBlooms$TN_TP + 0.1)   0.1014     0.1626   0.624    0.534
## 
## Residual standard error: 0.6962 on 86 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.004504,   Adjusted R-squared:  -0.007072 
## F-statistic: 0.3891 on 1 and 86 DF,  p-value: 0.5344
OWTNTP_invaded.lm.sum<-summary(OWTNTP_invaded.lm)
OWTNTP_uninvaded.lm.sum<-summary(OWTNTP_uninvaded.lm)
ggplot(OWcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) +
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-3 Global OW.png")
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).

Shoreline Bloom

#create linear models 
SBTNTP_invaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$TN_TP+.1))
SBTNTP_uninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$TN_TP+.1))

summary(SBTNTP_invaded.lm)
## 
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5667 -0.6502  0.1916  0.7317  2.4407 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        5.8364     2.3907   2.441   0.0259 *
## log(SBinvadedBlooms$TN_TP + 0.1)  -0.5496     0.5338  -1.030   0.3176  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.431 on 17 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0587, Adjusted R-squared:  0.003334 
## F-statistic:  1.06 on 1 and 17 DF,  p-value: 0.3176
summary(SBTNTP_uninvaded.lm)
## 
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7351 -1.2655 -0.2051  1.7642  5.8474 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                          14.229      8.627   1.649    0.113
## log(SBuninvadedBlooms$TN_TP + 0.1)   -2.817      2.529  -1.114    0.277
## 
## Residual standard error: 2.622 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.05339,    Adjusted R-squared:  0.01036 
## F-statistic: 1.241 on 1 and 22 DF,  p-value: 0.2774
SBTNTP_invaded.lm.sum<-summary(SBTNTP_invaded.lm)
SBTNTP_uninvaded.lm.sum<-summary(SBTNTP_uninvaded.lm)
ggplot(SBcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) +
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()
## Warning: Removed 3 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-3 Global SB.png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).

Chlorophyll-a and Microcystin

Open Water

#create linear models
OWchl.invaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$ESF_FP_Chl.a+.1))
OWchl.uninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$ESF_FP_Chl.a+.1))

summary(OWchl.invaded.lm)
## 
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44372 -0.13271 -0.04248  0.02934  1.07496 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                              -0.6270     0.1375  -4.560  0.00104 **
## log(OWinvadedBlooms$ESF_FP_Chl.a + 0.1)   0.1602     0.1426   1.124  0.28740   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3942 on 10 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1121, Adjusted R-squared:  0.02332 
## F-statistic: 1.263 on 1 and 10 DF,  p-value: 0.2874
summary(OWchl.uninvaded.lm)
## 
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80127 -0.23851 -0.15538 -0.00707  2.03637 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               -0.50704    0.06405  -7.916 1.37e-11
## log(OWuninvadedBlooms$ESF_FP_Chl.a + 0.1) -0.03765    0.04618  -0.815    0.417
##                                              
## (Intercept)                               ***
## log(OWuninvadedBlooms$ESF_FP_Chl.a + 0.1)    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.501 on 78 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.008452,   Adjusted R-squared:  -0.00426 
## F-statistic: 0.6649 on 1 and 78 DF,  p-value: 0.4173
OWchl.invaded.lm.sum<-summary(OWchl.invaded.lm)
OWchl.uninvaded.lm.sum<-summary(OWchl.uninvaded.lm)
ggplot(OWcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10') + 
  scale_color_grey()
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-2 Global OW.png")
## Saving 7 x 5 in image
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

Shoreline Bloom

#create linear models
SBchl.invaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$ESF_FP_Chl.a+.1))
SBchl.uninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$ESF_FP_Chl.a+.1))

summary(SBchl.invaded.lm)
## 
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2713 -0.9313  0.2731  1.0336  2.3892 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                               2.7787     1.4482   1.919    0.071 .
## log(SBinvadedBlooms$ESF_FP_Chl.a + 0.1)   0.1318     0.2493   0.529    0.603  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.531 on 18 degrees of freedom
## Multiple R-squared:  0.01529,    Adjusted R-squared:  -0.03941 
## F-statistic: 0.2795 on 1 and 18 DF,  p-value: 0.6035
summary(SBchl.uninvaded.lm)
## 
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2030 -1.9212 -0.1592  1.7338  5.5123 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 1.1730     2.4462   0.480    0.636
## log(SBuninvadedBlooms$ESF_FP_Chl.a + 0.1)   0.4346     0.2928   1.484    0.151
## 
## Residual standard error: 2.498 on 24 degrees of freedom
## Multiple R-squared:  0.08409,    Adjusted R-squared:  0.04593 
## F-statistic: 2.204 on 1 and 24 DF,  p-value: 0.1507
SBchl.invaded.lm.sum<-summary(SBchl.invaded.lm)
SBchl.uninvaded.lm.sum<-summary(SBchl.uninvaded.lm)
ggplot(SBcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10') + 
  scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'

ggsave("./output_figures/Fig 2-2 Global SB.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'

Reduced Dataset

#Extracting average annual TP for each lake 
library(plyr)
redavgTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TP, na.rm=TRUE))

#Extracting average annual TNTP for each lake 
redavgTNTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))

#Taking only rows where there is a value for microcystin
redBloom<-redCSLAP[!is.na(redCSLAP$ESF_Microcystin),]

#Which rows are missing TP values? 
rednoTPbloom<-redBloom[is.na(redBloom$TP),]

#Which rows are missing TNTP values? 
rednoTNTPbloom<-redBloom[is.na(redBloom$TN_TP),]

#merge average TP to noTPbloom data frame 
redavgTPbloom<-merge(rednoTPbloom, redavgTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)

#merge average TN:TP to noTPbloom data frame 
redavgTNTPbloom<-merge(rednoTNTPbloom, redavgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)

#remove original TP column, and re-name the average TP column 
redavgTPbloom<-redavgTPbloom[, c(1:16, 18:56)]
colnames(redavgTPbloom)[colnames(redavgTPbloom)=="Mean"] <- "TP"

#remove original TNTP column, and re-name the average TNTP column 
redavgTNTPbloom<-redavgTNTPbloom[, c(1:49, 51:56)]
colnames(redavgTNTPbloom)[colnames(redavgTNTPbloom)=="Mean"] <- "TN_TP"

#re-order columns 
redavgTPbloom<-redavgTPbloom[, c(1:16,55,17:54)]

#re-order columns 
redavgTNTPbloom<-redavgTNTPbloom[, c(1:49,55,50:54)]

#combine two data frames
redhadTP<-redBloom[!is.na(redBloom$TP),]
redcompleteBloom<-rbind(redhadTP, redavgTPbloom)
redcompleteBloom<-rbind(redcompleteBloom, redavgTNTPbloom)

#split by invasion status 
redinvadedBlooms<-redcompleteBloom[redcompleteBloom$Dreissenids == "Invaded",]
reduninvadedBlooms<-redcompleteBloom[redcompleteBloom$Dreissenids == "Uninvaded",]

TP and Microcystin

#create linear models 
red.invaded.lm<-lm(log(redinvadedBlooms$ESF_Microcystin+.1)~log(redinvadedBlooms$TP+.1))
red.uninvaded.lm<-lm(log(reduninvadedBlooms$ESF_Microcystin+.1)~log(reduninvadedBlooms$TP+.1))

summary(red.invaded.lm)
## 
## Call:
## lm(formula = log(redinvadedBlooms$ESF_Microcystin + 0.1) ~ log(redinvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.702 -2.371  0.361  2.076  3.634 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)
## (Intercept)                    -0.06498   12.50675  -0.005    0.996
## log(redinvadedBlooms$TP + 0.1) -0.88251    5.80406  -0.152    0.880
## 
## Residual standard error: 2.266 on 30 degrees of freedom
##   (21 observations deleted due to missingness)
## Multiple R-squared:  0.0007701,  Adjusted R-squared:  -0.03254 
## F-statistic: 0.02312 on 1 and 30 DF,  p-value: 0.8802
summary(red.uninvaded.lm)
## 
## Call:
## lm(formula = log(reduninvadedBlooms$ESF_Microcystin + 0.1) ~ 
##     log(reduninvadedBlooms$TP + 0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7856 -0.7785 -0.1978  0.2512  5.1135 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        -33.66      28.00  -1.202    0.252
## log(reduninvadedBlooms$TP + 0.1)   -15.39      12.85  -1.197    0.254
## 
## Residual standard error: 1.681 on 12 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1067, Adjusted R-squared:  0.03228 
## F-statistic: 1.434 on 1 and 12 DF,  p-value: 0.2543
red.invaded.lm.sum<-summary(red.invaded.lm)
red.uninvaded.lm.sum<-summary(red.uninvaded.lm)
ggplot(redcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(data=subset(redcompleteBloom, Dreissenids == "Uninvaded" ),
               aes(x=TP, y=ESF_Microcystin, color=Dreissenids), method=lm, se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) + 
  scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

ggsave("./output_data/Fig 2-1 Reduced.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 22 rows containing missing values (geom_point).

TN:TP and Microcystin

#create linear models 
redTNTP_invaded.lm<-lm(log(redinvadedBlooms$ESF_Microcystin+.1)~log(redinvadedBlooms$TN_TP+.1))
redTNTP_uninvaded.lm<-lm(log(reduninvadedBlooms$ESF_Microcystin+.1)~log(reduninvadedBlooms$TN_TP+.1))

summary(redTNTP_invaded.lm)
## 
## Call:
## lm(formula = log(redinvadedBlooms$ESF_Microcystin + 0.1) ~ log(redinvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4117 -1.4734 -0.5156  1.9925  4.0266 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        -3.0059     1.8018  -1.668   0.1057  
## log(redinvadedBlooms$TN_TP + 0.1)   1.2073     0.4403   2.742   0.0102 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.027 on 30 degrees of freedom
##   (21 observations deleted due to missingness)
## Multiple R-squared:  0.2004, Adjusted R-squared:  0.1737 
## F-statistic: 7.518 on 1 and 30 DF,  p-value: 0.0102
summary(redTNTP_uninvaded.lm)
## 
## Call:
## lm(formula = log(reduninvadedBlooms$ESF_Microcystin + 0.1) ~ 
##     log(reduninvadedBlooms$TN_TP + 0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7063 -0.7244 -0.3029  0.0851  5.2936 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)                           -4.073      4.405  -0.925    0.373
## log(reduninvadedBlooms$TN_TP + 0.1)    1.184      1.321   0.896    0.388
## 
## Residual standard error: 1.722 on 12 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06275,    Adjusted R-squared:  -0.01535 
## F-statistic: 0.8034 on 1 and 12 DF,  p-value: 0.3877
redTNTP_invaded.lm.sum<-summary(redTNTP_invaded.lm)
redTNTP_uninvaded.lm.sum<-summary(redTNTP_uninvaded.lm)
ggplot(redcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(data=subset(redcompleteBloom, Dreissenids == "Invaded" ),
               aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids), method=lm, se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-3 Reduced.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).

## Warning: Removed 22 rows containing missing values (geom_point).

Chlorophyll and Microcystin

#create linear models
red.chl.invaded.lm<-lm(log(redinvadedBlooms$ESF_Microcystin+.1)~log(redinvadedBlooms$ESF_FP_Chl.a+.1))
red.chl.uninvaded.lm<-lm(log(reduninvadedBlooms$ESF_Microcystin+.1)~log(reduninvadedBlooms$ESF_FP_Chl+.1))

summary(red.chl.invaded.lm)
## 
## Call:
## lm(formula = log(redinvadedBlooms$ESF_Microcystin + 0.1) ~ log(redinvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7338 -0.6808  0.1572  0.8108  2.6772 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              -0.21172    0.42764  -0.495    0.623
## log(redinvadedBlooms$ESF_FP_Chl.a + 0.1)  0.62604    0.08358   7.491 1.03e-09
##                                             
## (Intercept)                                 
## log(redinvadedBlooms$ESF_FP_Chl.a + 0.1) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.504 on 50 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5288, Adjusted R-squared:  0.5194 
## F-statistic: 56.11 on 1 and 50 DF,  p-value: 1.028e-09
summary(red.chl.uninvaded.lm)
## 
## Call:
## lm(formula = log(reduninvadedBlooms$ESF_Microcystin + 0.1) ~ 
##     log(reduninvadedBlooms$ESF_FP_Chl + 0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6438 -0.7245  0.1733  0.7195  1.3421 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              -0.78141    0.28565  -2.736   0.0181
## log(reduninvadedBlooms$ESF_FP_Chl + 0.1)  0.73772    0.09183   8.034  3.6e-06
##                                             
## (Intercept)                              *  
## log(reduninvadedBlooms$ESF_FP_Chl + 0.1) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9466 on 12 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8432, Adjusted R-squared:  0.8301 
## F-statistic: 64.54 on 1 and 12 DF,  p-value: 3.602e-06
red.chl.invaded.lm.sum<-summary(red.chl.invaded.lm)
red.chl.uninvaded.lm.sum<-summary(red.chl.uninvaded.lm)
ggplot(redcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10') + 
  scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-2 Reduced.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).